1. 程式人生 > >Python程式建立MongoDB資料庫集合的唯一索引

Python程式建立MongoDB資料庫集合的唯一索引

可以使用ensure_index或者create_index方法,兩種方法語法相同。
首先,連線資料庫中的目標集合:
col = MongoClient(the_client).get_database(the_db).get_collection(the_col)

然後,建立唯一索引,不加unique的話預設是普通的索引,即unique=False:

col.create_index([("索引欄位名", 1)], unique=True)

其中的1和-1分別表示正序與負序排列。注意,索引要用中括號——[ ("索引",1)],具體參見下方原始碼說明。

例項:

# -*- coding:utf-8 -*-
# 給mongodb集合建立索引
from pymongo import MongoClient


def create_mongodb_index(the_data_client, the_data_db, the_data_cl, index_name, unique=False):
    data_client = MongoClient(the_data_client)
    data_db = data_client.get_database(the_data_db)
    data_col = data_db.get_collection(the_data_cl)

    print "start, the index is:", index_name

    data_col.ensure_index([(index_name, 1)], unique=unique)
    print "run over"


if __name__ == '__main__':
    DataClient = ''
    DataDB = ''
    DataCol = ''

    IndexName = ''

    create_mongodb_index(DataClient, DataDB, DataCol, index_name=IndexName, unique=False)

附:原始碼

    def create_index(self, keys, session=None, **kwargs):
        """Creates an index on this collection.

        Takes either a single key or a list of (key, direction) pairs.
        The key(s) must be an instance of :class:`basestring`
        (:class:`str` in python 3), and the direction(s) must be one of
        (:data:`~pymongo.ASCENDING`, :data:`~pymongo.DESCENDING`,
        :data:`~pymongo.GEO2D`, :data:`~pymongo.GEOHAYSTACK`,
        :data:`~pymongo.GEOSPHERE`, :data:`~pymongo.HASHED`,
        :data:`~pymongo.TEXT`).

        To create a single key ascending index on the key ``'mike'`` we just
        use a string argument::

          >>> my_collection.create_index("mike")

        For a compound index on ``'mike'`` descending and ``'eliot'``
        ascending we need to use a list of tuples::

          >>> my_collection.create_index([("mike", pymongo.DESCENDING),
          ...                             ("eliot", pymongo.ASCENDING)])

        All optional index creation parameters should be passed as
        keyword arguments to this method. For example::

          >>> my_collection.create_index([("mike", pymongo.DESCENDING)],
          ...                            background=True)

        Valid options include, but are not limited to:

          - `name`: custom name to use for this index - if none is
            given, a name will be generated.
          - `unique`: if ``True`` creates a uniqueness constraint on the index.
          - `background`: if ``True`` this index should be created in the
            background.
          - `sparse`: if ``True``, omit from the index any documents that lack
            the indexed field.
          - `bucketSize`: for use with geoHaystack indexes.
            Number of documents to group together within a certain proximity
            to a given longitude and latitude.
          - `min`: minimum value for keys in a :data:`~pymongo.GEO2D`
            index.
          - `max`: maximum value for keys in a :data:`~pymongo.GEO2D`
            index.
          - `expireAfterSeconds`: <int> Used to create an expiring (TTL)
            collection. MongoDB will automatically delete documents from
            this collection after <int> seconds. The indexed field must
            be a UTC datetime or the data will not expire.
          - `partialFilterExpression`: A document that specifies a filter for
            a partial index.
          - `collation` (optional): An instance of
            :class:`~pymongo.collation.Collation`. This option is only supported
            on MongoDB 3.4 and above.

        See the MongoDB documentation for a full list of supported options by
        server version.

        .. warning:: `dropDups` is not supported by MongoDB 3.0 or newer. The
          option is silently ignored by the server and unique index builds
          using the option will fail if a duplicate value is detected.

        .. note:: `partialFilterExpression` requires server version **>= 3.2**

        .. note:: The :attr:`~pymongo.collection.Collection.write_concern` of
           this collection is automatically applied to this operation when using
           MongoDB >= 3.4.

        :Parameters:
          - `keys`: a single key or a list of (key, direction)
            pairs specifying the index to create
          - `session` (optional): a
            :class:`~pymongo.client_session.ClientSession`.
          - `**kwargs` (optional): any additional index creation
            options (see the above list) should be passed as keyword
            arguments

        .. versionchanged:: 3.6
           Added ``session`` parameter. Added support for passing maxTimeMS
           in kwargs.
        .. versionchanged:: 3.4
           Apply this collection's write concern automatically to this operation
           when connected to MongoDB >= 3.4. Support the `collation` option.
        .. versionchanged:: 3.2
            Added partialFilterExpression to support partial indexes.
        .. versionchanged:: 3.0
            Renamed `key_or_list` to `keys`. Removed the `cache_for` option.
            :meth:`create_index` no longer caches index names. Removed support
            for the drop_dups and bucket_size aliases.

        .. mongodoc:: indexes
        """
        keys = helpers._index_list(keys)
        name = kwargs.setdefault("name", helpers._gen_index_name(keys))
        cmd_options = {}
        if "maxTimeMS" in kwargs:
            cmd_options["maxTimeMS"] = kwargs.pop("maxTimeMS")
        self.__create_index(keys, kwargs, session, **cmd_options)
        return name
    def __create_index(self, keys, index_options, session, **kwargs):
        """Internal create index helper.

        :Parameters:
          - `keys`: a list of tuples [(key, type), (key, type), ...]
          - `index_options`: a dict of index options.
          - `session` (optional): a
            :class:`~pymongo.client_session.ClientSession`.
        """
        index_doc = helpers._index_document(keys)
        index = {"key": index_doc}
        collation = validate_collation_or_none(
            index_options.pop('collation', None))
        index.update(index_options)

        with self._socket_for_writes() as sock_info:
            if collation is not None:
                if sock_info.max_wire_version < 5:
                    raise ConfigurationError(
                        'Must be connected to MongoDB 3.4+ to use collations.')
                else:
                    index['collation'] = collation
            cmd = SON([('createIndexes', self.name), ('indexes', [index])])
            cmd.update(kwargs)
            self._command(
                sock_info, cmd, read_preference=ReadPreference.PRIMARY,
                codec_options=_UNICODE_REPLACE_CODEC_OPTIONS,
                write_concern=self._write_concern_for(session),
                session=session)
    def ensure_index(self, key_or_list, cache_for=300, **kwargs):
        """**DEPRECATED** - Ensures that an index exists on this collection.

        .. versionchanged:: 3.0
            **DEPRECATED**
        """
        warnings.warn("ensure_index is deprecated. Use create_index instead.",
                      DeprecationWarning, stacklevel=2)
        # The types supported by datetime.timedelta.
        if not (isinstance(cache_for, integer_types) or
                isinstance(cache_for, float)):
            raise TypeError("cache_for must be an integer or float.")

        if "drop_dups" in kwargs:
            kwargs["dropDups"] = kwargs.pop("drop_dups")

        if "bucket_size" in kwargs:
            kwargs["bucketSize"] = kwargs.pop("bucket_size")

        keys = helpers._index_list(key_or_list)
        name = kwargs.setdefault("name", helpers._gen_index_name(keys))

        # Note that there is a race condition here. One thread could
        # check if the index is cached and be preempted before creating
        # and caching the index. This means multiple threads attempting
        # to create the same index concurrently could send the index
        # to the server two or more times. This has no practical impact
        # other than wasted round trips.
        if not self.__database.client._cached(self.__database.name,
                                              self.__name, name):
            self.__create_index(keys, kwargs, session=None)
            self.__database.client._cache_index(self.__database.name,
                                                self.__name, name, cache_for)
            return name
        return None