vector原始碼剖析

阿新 • • 發佈：2019-02-12

vector

前導準備

原始碼位置
* C:\Program Files (x86)\Microsoft Visual Studio 10.0\VC\include

原始碼版本

Copyright (c) 1992-2009 by P.J. Plauger. ALL RIGHTS RESERVED.
Consult your license regarding permissions and restrictions.
V5.20:0009

類定義

前導定義

template<class _Ty,
    class _Ax = allocator<_Ty> >
    class 
 vector
        : public _Vector_val<_Ty, _Ax>
    {   // varying size array of values
public:
    typedef vector<_Ty, _Ax> _Myt;
    typedef _Vector_val<_Ty, _Ax> _Mybase;
    typedef typename _Mybase::_Alty _Alloc;

    typedef _Alloc allocator_type;
    typedef typename _Alloc::size_type size_type;
    typedef 
 typename _Alloc::difference_type difference_type;
    typedef typename _Alloc::pointer pointer;
    typedef typename _Alloc::const_pointer const_pointer;
    typedef typename _Alloc::reference reference;
    typedef typename _Alloc::const_reference const_reference;
    typedef typename _Alloc::value_type value_type;

    typedef 
 _Vector_iterator<_Mybase> iterator;
    typedef _Vector_const_iterator<_Mybase> const_iterator;

    typedef _STD reverse_iterator<iterator> reverse_iterator;
    typedef _STD reverse_iterator<const_iterator> const_reverse_iterator;

    .......

上面是vector 類開始處的程式碼片段。重點分析如下：
* allocator是專門的記憶體分配器，所有的記憶體分配的相關內容經由alloctor來處理。
* 針對基類的分析，_Vector_val，這裡面儲存著vector的操作資料
* 形如typedef typename 的定義
* 迭代器iterator的定義

allocator

我們可以在vc\crt\src看到如下這個巨集定義，微軟的編譯器中預設這個_ALLOCATOR類是它的預設記憶體分配器，allocator是標準上的一個要求，當然我們可以實現不一樣的記憶體分配器。

定義

 #define _ALLOCATOR allocator

接著我們看看這個類的定義


        // TEMPLATE CLASS _ALLOCATOR
template<class _Ty>
    class _ALLOCATOR
        : public _Allocator_base<_Ty>
    {   // generic allocator for objects of class _Ty
public:
    typedef _Allocator_base<_Ty> _Mybase;
    typedef typename _Mybase::value_type value_type;

    typedef value_type _FARQ *pointer;
    typedef value_type _FARQ& reference;
    typedef const value_type _FARQ *const_pointer;
    typedef const value_type _FARQ& const_reference;

    typedef _SIZT size_type;
    typedef _PDFT difference_type;

    };

依舊是使用typedef來定義出必要的型別，以備後續使用。
內部重要的函式是四個，構造解構函式，申請釋放函式,我們先看申請和釋放函式

申請

pointer allocate(size_type _Count)
    {   // allocate array of _Count elements
        return (_Allocate(_Count, (pointer)0));
    }

template<class _Ty> inline
    _Ty _FARQ *_Allocate(_SIZT _Count, _Ty _FARQ *)
    {   // allocate storage for _Count elements of type _Ty
    void *_Ptr = 0;

    if (_Count <= 0)
        _Count = 0;
    else if (((_SIZT)(-1) / sizeof (_Ty) < _Count)
        || (_Ptr = ::operator new(_Count * sizeof (_Ty))) == 0)
        _THROW_NCEE(bad_alloc, 0);

    return ((_Ty _FARQ *)_Ptr);
    }

我們來詳細分析這個new的操作過程。

        || (_Ptr = ::operator new(_Count * sizeof (_Ty))) == 0)
69BF0DC4  cmp         dword ptr [_Count],1FFFFFFFh  
69BF0DCB  ja          std::_Allocate<std::pair<CString,CString> >+55h (69BF0DE5h)  
69BF0DCD  mov         eax,dword ptr [_Count]  
69BF0DD0  shl         eax,3  
69BF0DD3  push        eax  
69BF0DD4  call        operator new (69BDCC07h)  
69BF0DD9  add         esp,4  
69BF0DDC  mov         dword ptr [_Ptr],eax  
69BF0DDF  cmp         dword ptr [_Ptr],0  
69BF0DE3  jne         std::_Allocate<std::pair<CString,CString> >+73h (69BF0E03h)

在69BF0DD4進入的跳轉

operator new:
69BDCC07  jmp         operator new (69D5FD52h) 

69D5FD52  jmp         dword ptr [__imp_operator new (69DDA6F0h)]

void *__CRTDECL operator new(size_t size) _THROW1(_STD bad_alloc)
        {       // try to allocate size bytes
6EA57DA0  mov         edi,edi  
6EA57DA2  push        ebp  
6EA57DA3  mov         ebp,esp  
6EA57DA5  sub         esp,10h  
        void *p;
        while ((p = malloc(size)) == 0)
6EA57DA8  mov         eax,dword ptr [size]  
6EA57DAB  push        eax  
6EA57DAC  call        malloc (6EA69C40h)  
6EA57DB1  add         esp,4  
6EA57DB4  mov         dword ptr [p],eax  
6EA57DB7  cmp         dword ptr [p],0  
6EA57DBB  jne         operator new+79h (6EA57E19h)

這是一個記憶體申請的new。

釋放

    void deallocate(pointer _Ptr, size_type)
        {   // deallocate object at _Ptr, ignore size
        ::operator delete(_Ptr);
        }

申請和釋放函式，進行一定的引數判斷後，直接使用operator delete和new來進行記憶體的申請。

構造

然後我們來研究一下構造和解構函式。如下所示

void construct(pointer _Ptr, const _Ty& _Val)
    {   // construct object at _Ptr with value _Val
        _Construct(_Ptr, _Val);
    }
        // TEMPLATE FUNCTION _Construct
template<class _Ty1,
    class _Ty2> inline
    void _Construct(_Ty1 _FARQ *_Ptr, _Ty2&& _Val)
    {   // construct object at _Ptr with value _Val
    void _FARQ *_Vptr = _Ptr;
    ::new (_Vptr) _Ty1(_STD forward<_Ty2>(_Val));
    }

template<class _Ty1> inline
    void _Construct(_Ty1 _FARQ *_Ptr)
    {   // construct object at _Ptr with default value
    void _FARQ *_Vptr = _Ptr;

    ::new (_Vptr) _Ty1();
    }

建構函式主要是通過replacement new來進行操作，在指定地址處構造資料
我們來詳細分析一下這個::new (_Vptr) _Ty1(_STD forward<_Ty2>(_Val))的過程

我們來分析一下::new的彙編指令，來一探究竟。

    ::new (_Vptr) _Ty1(_STD forward<_Ty2>(_Val));
69BF16D3  mov         eax,dword ptr [_Vptr]  
69BF16D6  push        eax  
69BF16D7  push        8  
69BF16D9  call        operator new (69BE2AFDh)  
69BF16DE  add         esp,8  
69BF16E1  mov         dword ptr [ebp-0E0h],eax  
69BF16E7  mov         dword ptr [ebp-4],0  
69BF16EE  cmp         dword ptr [ebp-0E0h],0  
69BF16F5  je          std::_Construct<std::pair<CString,CString>,std::pair<CString,CString> const &>+87h (69BF1717h)  
69BF16F7  mov         ecx,dword ptr [_Val]  
69BF16FA  push        ecx  
69BF16FB  call        std::forward<std::pair<CString,CString> const &> (69BE1ECDh)  
69BF1700  add         esp,4  
69BF1703  push        eax  
69BF1704  mov         ecx,dword ptr [ebp-0E0h]  
69BF170A  call        std::pair<CString,CString>::pair<CString,CString> (69BDFB8Ch)  
69BF170F  mov         dword ptr [ebp-0F4h],eax  
69BF1715  jmp         std::_Construct<std::pair<CString,CString>,std::pair<CString,CString> const &>+91h (69BF1721h)  
69BF1717  mov         dword ptr [ebp-0F4h],0  
69BF1721  mov         edx,dword ptr [ebp-0F4h]  
69BF1727  mov         dword ptr [ebp-0ECh],edx  
69BF172D  mov         dword ptr [ebp-4],0FFFFFFFFh

我們在69BF16D9處進入call，看看它執行到了哪裡

operator new:
69BE2AFD  jmp         operator new (69BF2810h) 

inline void *__CRTDECL operator new(size_t, void *_Where) _THROW0()
    {   // construct array with placement at _Where
69BF2810  push        ebp  
69BF2811  mov         ebp,esp  
69BF2813  sub         esp,0C0h  
69BF2819  push        ebx  
69BF281A  push        esi  
69BF281B  push        edi  
69BF281C  lea         edi,[ebp-0C0h]  
69BF2822  mov         ecx,30h  
69BF2827  mov         eax,0CCCCCCCCh  
69BF282C  rep stos    dword ptr es:[edi]  
    return (_Where);
69BF282E  mov         eax,dword ptr [_Where]  
    }
69BF2831  pop         edi  
69BF2832  pop         esi  
69BF2833  pop         ebx  
69BF2834  mov         esp,ebp  
69BF2836  pop         ebp  
69BF2837  ret

如上，這個是一個std的new操作符的一個operator的操作，函式中只是引數代表的地址。
然後返回上一個函式中，此時呼叫一下forward，接著呼叫某種型別的建構函式，我們的是pair的建構函式。

析構

void destroy(pointer _Ptr)
    {   // destroy object at _Ptr
        _Destroy(_Ptr);
    }

    // TEMPLATE FUNCTION _Destroy
template<class _Ty> inline
    void _Destroy(_Ty _FARQ *_Ptr)
    {   // destroy object at _Ptr
    _Ptr->~_Ty();
    }
template<> inline
    void _Destroy(char _FARQ *)
    {   // destroy a char (do nothing)
    }

template<> inline
    void _Destroy(wchar_t _FARQ *)
    {   // destroy a wchar_t (do nothing)
    }

解構函式則是呼叫物件的解構函式，另外針對不同的資料型別進行不同的析構處理。此處使用了函式模板偏特化

_Vector_val基類

        // TEMPLATE CLASS _Vector_val
template<class _Ty,
    class _Alloc>
    class _Vector_val
        : public _Container_base
    {   // base class for vector to hold data
public:
    typedef typename _Alloc::template rebind<_Ty>::other _Alty;

 #if _ITERATOR_DEBUG_LEVEL == 0
    _Vector_val(_Alloc _Al = _Alloc())
        : _Alval(_Al)
        {   // construct allocator from _Al
        _Myfirst = 0;
        _Mylast = 0;
        _Myend = 0;
        }

    ~_Vector_val()
        {   // destroy proxy
        }

 #else /* _ITERATOR_DEBUG_LEVEL == 0 */
    _Vector_val(_Alloc _Al = _Alloc())
        : _Alval(_Al)
        {   // construct allocator from _Al
        typename _Alloc::template rebind<_Container_proxy>::other
            _Alproxy(_Alval);
        this->_Myproxy = _Alproxy.allocate(1);
        _Cons_val(_Alproxy, this->_Myproxy, _Container_proxy());
        this->_Myproxy->_Mycont = this;

        _Myfirst = 0;
        _Mylast = 0;
        _Myend = 0;
        }

    ~_Vector_val()
        {   // destroy proxy
        typename _Alloc::template rebind<_Container_proxy>::other
            _Alproxy(_Alval);
        this->_Orphan_all();
        _Dest_val(_Alproxy, this->_Myproxy);
        _Alproxy.deallocate(this->_Myproxy, 1);
        this->_Myproxy = 0;
        }
 #endif /* _ITERATOR_DEBUG_LEVEL == 0 */

    typedef typename _Alty::size_type size_type;
    typedef typename _Alty::difference_type difference_type;
    typedef typename _Alty::pointer pointer;
    typedef typename _Alty::const_pointer const_pointer;
    typedef typename _Alty::reference reference;
    typedef typename _Alty::const_reference const_reference;
    typedef typename _Alty::value_type value_type;

    pointer _Myfirst;   // pointer to beginning of array
    pointer _Mylast;    // pointer to current end of sequence
    pointer _Myend; // pointer to end of array
    _Alty _Alval;   // allocator object for values
    };

這個基類中主要儲存的是vector的資料操作指標，重要的是下面這個三個結構

pointer _Myfirst;   // pointer to beginning of array
pointer _Mylast;    // pointer to current end of sequence
pointer _Myend; // pointer to end of array

其他函式對其進行初始化和銷燬的處理，根據debug級別進行不同的處理，也使用了typedef來定義出具體的型別。

iterator

vector的迭代器分兩種四類

typedef _Vector_iterator<_Mybase> iterator;
typedef _Vector_const_iterator<_Mybase> const_iterator;
typedef _STD reverse_iterator<iterator> reverse_iterator;
typedef _STD reverse_iterator<const_iterator> const_reverse_iterator;

分是否const和正向或反向迭代器

vector 迭代器

_Vector_iterator定義

template<class _Myvec>
    class _Vector_iterator
        : public _Vector_const_iterator<_Myvec>
    {   // iterator for mutable vector
public:
    typedef _Vector_iterator<_Myvec> _Myiter;
    typedef _Vector_const_iterator<_Myvec> _Mybase;
    typedef random_access_iterator_tag iterator_category;

    typedef typename _Myvec::value_type value_type;
    typedef typename _Myvec::difference_type difference_type;
    typedef typename _Myvec::pointer pointer;
    typedef typename _Myvec::reference reference;

    _Vector_iterator()
        {   // construct with null vector pointer
        }

    _Vector_iterator(pointer _Parg, const _Container_base *_Pvector)
        : _Mybase(_Parg, _Pvector)
        {   // construct with pointer _Parg
        }

定義中，有宣告出此迭代器的型別random_access_iterator_tag，決定了當前迭代器能做的操作。資料型別value_type等等。以及定義出建構函式。
我們可以看看vector中的迭代器的構造，就可以知道vector向迭代器傳遞了哪些資料。

typedef _Vector_iterator<_Mybase> iterator;

iterator begin()
{   
    // return iterator for beginning of mutable sequence
    return (iterator(this->_Myfirst, this));
}

vector向迭代器傳遞了資料操作首地址和類操作this指標，將資料操作權遞交給迭代器。_Vector_iterator將資料指標傳遞給基類儲存。

_Vector_iterator對資料的操作

    ....
    _Myiter operator-(difference_type _Off) const
        {   // return this - integer
        _Myiter _Tmp = *this;
        return (_Tmp -= _Off);
        }

    difference_type operator-(const _Mybase& _Right) const
        {   // return difference of iterators
        return (*(_Mybase *)this - _Right);
        }

    reference operator[](difference_type _Off) const
        {   // subscript
        return (*(*this + _Off));
        }
    ....

那麼它是如何對資料的操作的呢，如上所示，返回的型別就是類開始處已經定義好的型別之一。通過迭代器來操作具體容器的資料。如何操作以及型別都由迭代器做具體的處理。型別由typedef迭代器的時候指定要操作資料的型別，而如何操作則由具體的迭代器定義來定義出何種的訪問方式。

vector迭代器基類

_Vector_iterator 繼承自 _Vector_const_iterator 繼承自 _Iterator012

        // TEMPLATE CLASS iterator
template<class _Category,
    class _Ty,
    class _Diff = ptrdiff_t,
    class _Pointer = _Ty *,
    class _Reference = _Ty&>
    struct iterator
    {   // base type for all iterator classes
    typedef _Category iterator_category;
    typedef _Ty value_type;
    typedef _Diff difference_type;
    typedef _Diff distance_type;    // retained
    typedef _Pointer pointer;
    typedef _Reference reference;
    };

template<class _Category,
    class _Ty,
    class _Diff,
    class _Pointer,
    class _Reference,
    class _Base>
    struct _Iterator012
        : public _Base
    {
    typedef _Category iterator_category;
    typedef _Ty value_type;
    typedef _Diff difference_type;
    typedef _Diff distance_type;    // retained
    typedef _Pointer pointer;
    typedef _Reference reference;
    };

這裡是迭代器的最基本的定義。

template<class _Myvec>
    class _Vector_const_iterator
        : public _Iterator012<random_access_iterator_tag,
            typename _Myvec::value_type,
            typename _Myvec::difference_type,
            typename _Myvec::const_pointer,
            typename _Myvec::const_reference,
            _Iterator_base>
    {   // iterator for nonmutable vector
public:
.........

我們的vector迭代器如上繼承，指定我們是一個隨機儲存迭代器，可以隨機索取資料，定指定其他四項資料型別，供索引資料使用。

iterator_traits

我們繼續看vector的定義，接著我們看到了建構函式，其中有這麼一幕。

    ......
    template<class _Iter>
        vector(_Iter _First, _Iter _Last)
        : _Mybase()
        {   // construct from [_First, _Last)
        _Construct(_First, _Last, _Iter_cat(_First));
        }

    template<class _Iter>
        void _Construct(_Iter _Count, _Iter _Val, _Int_iterator_tag)
        {   // initialize with _Count * _Val
        size_type _Size = (size_type)_Count;
        _Ty _Newval = (_Ty)_Val;
        _Construct_n(_Size, _STD addressof(_Newval));
        }

    template<class _Iter>
        void _Construct(_Iter _First,
            _Iter _Last, input_iterator_tag)
        {   // initialize with [_First, _Last), input iterators
        _TRY_BEGIN
        insert(begin(), _First, _Last);
        _CATCH_ALL
        _Tidy();
        _RERAISE;
        _CATCH_END
        }
    ......

vector 支援各種建構函式，值得一說的是上面的這種構造方式，使用_Iter_cat函式取出_First對應的迭代器型別，根據不同的迭代器型別，執行不同的構造演算法。下面我們來看看是如何通過迭代器獲取到迭代器型別的，這是一個型別識別的過程。

        // TEMPLATE FUNCTION _Iter_cat
template<class _Iter> inline
    typename iterator_traits<_Iter>::iterator_category
        _Iter_cat(const _Iter&)
    {   // return category from iterator argument
    typename iterator_traits<_Iter>::iterator_category _Cat;
    return (_Cat);
    }

內部主要使用iterator_traits來做核心任務，這個是迭代器型別識別的萃取類。其如下定義

        // TEMPLATE CLASS iterator_traits
template<class _Iter>
    struct iterator_traits
    {   // get traits from iterator _Iter
    typedef typename _Iter::iterator_category iterator_category;
    typedef typename _Iter::value_type value_type;
    typedef typename _Iter::difference_type difference_type;
    typedef difference_type distance_type;  // retained
    typedef typename _Iter::pointer pointer;
    typedef typename _Iter::reference reference;
    };

template<class _Ty>
    struct iterator_traits<_Ty *>
    {   // get traits from pointer
    typedef random_access_iterator_tag iterator_category;
    typedef _Ty value_type;
    typedef ptrdiff_t difference_type;
    typedef ptrdiff_t distance_type;    // retained
    typedef _Ty *pointer;
    typedef _Ty& reference;
    };

template<> struct iterator_traits<_Bool>
    {   // get traits from integer type
    typedef _Int_iterator_tag iterator_category;
    };

template<> struct iterator_traits<char>
    {   // get traits from integer type
    typedef _Int_iterator_tag iterator_category;
    };
    .....

iterator_traits 提供了多種特化的版本，如上，支援原始指標和迭代器的型別。還支援bool， char，等等資料型別的型別識別。

vector容器自身對資料的操作

push_back

......
    void push_back(const _Ty& _Val)
        {   // insert element at end
        if (_Inside(_STD addressof(_Val)))
            {   // push back an element
            size_type _Idx = _STD addressof(_Val) - this->_Myfirst;
            if (this->_Mylast == this->_Myend)
                _Reserve(1);
            _Orphan_range(this->_Mylast, this->_Mylast);
            _Cons_val(this->_Alval,
                this->_Mylast,
                this->_Myfirst[_Idx]);
            ++this->_Mylast;
            }
        else
            {   // push back a non-element
            if (this->_Mylast == this->_Myend)
                _Reserve(1);
            _Orphan_range(this->_Mylast, this->_Mylast);
            _Cons_val(this->_Alval,
                this->_Mylast,
                _Val);
            ++this->_Mylast;
            }
        }

push_back 的邏輯較為複雜：
* 首先，判斷要插入的值的地址是否位於vector所已有資料的地址範圍內。
* 如果是，那麼計算出該值的位置偏移，使用這個值來初始化資料。
* 如果這是一個新值，那麼我要判斷是否我還有可用空間。
* 如果沒有，那麼使用_Reserve來申請空間
* 如果有可用空間，那麼使用_Cons_val來初始化資料
* 最後遞增vector隊尾偏移。
接著我們分別介紹其中幾個核心的函式

_Reserve

那麼，vector是如何預留空間的呢

.....
    void _Reserve(size_type _Count)
        {   // ensure room for _Count new elements, grow exponentially
        size_type _Size = size();
        if (max_size() - _Count < _Size)
            _Xlen();
        else if ((_Size += _Count) <= capacity())
            ;
        else
            reserve(_Grow_to(_Size));
        }

        _SIZT max_size() const _THROW0()
        {   // estimate maximum array size
        _SIZT _Count = (_SIZT)(-1) / sizeof (_Ty);
        return (0 < _Count ? _Count : 1);
        }

此函式做引數合法判斷，確定不能超過最大大小，並且如果已經有容量符合要求了，那麼什麼都不做，如果容量確實不夠，那麼增長容量。
那麼具體容量是怎麼增長的，有個什麼規則呢

.....
    size_type _Grow_to(size_type _Count) const
        {   // grow by 50% or at least to _Count
        size_type _Capacity = capacity();

        _Capacity = max_size() - _Capacity / 2 < _Capacity
            ? 0 : _Capacity + _Capacity / 2;    // try to grow by 50%
        if (_Capacity < _Count)
            _Capacity = _Count;
        return (_Capacity);
        }

從上面的我們可以看出，容量每次增長50%，這是vs2010 stl這個版本的實現，其他的實現可能不同。
我們知道了容量的增量了，那麼具體它怎麼實現記憶體的操作的呢。

......
    void reserve(size_type _Count)
        {   // determine new minimum length of allocated storage
        if (max_size() < _Count)
            _Xlen();    // result too long
        else if (capacity() < _Count)
            {   // not enough room, reallocate
            pointer _Ptr = this->_Alval.allocate(_Count);

            _TRY_BEGIN
            _Umove(this->_Myfirst, this->_Mylast, _Ptr);
            _CATCH_ALL
            this->_Alval.deallocate(_Ptr, _Count);
            _RERAISE;
            _CATCH_END

            size_type _Size = size();
            if (this->_Myfirst != 0)
                {   // destroy and deallocate old array
                _Destroy(this->_Myfirst, this->_Mylast);
                this->_Alval.deallocate(this->_Myfirst,
                    this->_Myend - this->_Myfirst);
                }

            this->_Orphan_all();
            this->_Myend = _Ptr + _Count;
            this->_Mylast = _Ptr + _Size;
            this->_Myfirst = _Ptr;
            }
        }

這裡申請一個增長後容量大小的空間，然後將原始空間析構釋放，之後計算新的頭尾偏移值。

_Cons_val

回到push_back函式中，當申請空間等操作都完成後，開始在這塊空間上構造資料。

template<class _Alloc,
    class _Ty1,
    class _Ty2>
    void _Cons_val(_Alloc& _Alval, _Ty1 *_Pdest, _Ty2&& _Src)
    {   // construct using allocator
    _Alval.construct(_Pdest, _STD forward<_Ty2>(_Src));
    }

實際上是呼叫vector的記憶體分配器去做實際的操作。這個在開始allocator中我們就接觸到了如何構造析構和申請釋放記憶體的操作。