Python程式建立MongoDB資料庫集合的唯一索引
阿新 • • 發佈:2018-12-03
可以使用ensure_index或者create_index方法,兩種方法語法相同。
首先,連線資料庫中的目標集合:
col = MongoClient(the_client).get_database(the_db).get_collection(the_col)
然後,建立唯一索引,不加unique的話預設是普通的索引,即unique=False:
col.create_index([("索引欄位名", 1)], unique=True)
其中的1和-1分別表示正序與負序排列。注意,索引要用中括號——[ ("索引",1)],具體參見下方原始碼說明。
例項:
# -*- coding:utf-8 -*- # 給mongodb集合建立索引 from pymongo import MongoClient def create_mongodb_index(the_data_client, the_data_db, the_data_cl, index_name, unique=False): data_client = MongoClient(the_data_client) data_db = data_client.get_database(the_data_db) data_col = data_db.get_collection(the_data_cl) print "start, the index is:", index_name data_col.ensure_index([(index_name, 1)], unique=unique) print "run over" if __name__ == '__main__': DataClient = '' DataDB = '' DataCol = '' IndexName = '' create_mongodb_index(DataClient, DataDB, DataCol, index_name=IndexName, unique=False)
附:原始碼
def create_index(self, keys, session=None, **kwargs): """Creates an index on this collection. Takes either a single key or a list of (key, direction) pairs. The key(s) must be an instance of :class:`basestring` (:class:`str` in python 3), and the direction(s) must be one of (:data:`~pymongo.ASCENDING`, :data:`~pymongo.DESCENDING`, :data:`~pymongo.GEO2D`, :data:`~pymongo.GEOHAYSTACK`, :data:`~pymongo.GEOSPHERE`, :data:`~pymongo.HASHED`, :data:`~pymongo.TEXT`). To create a single key ascending index on the key ``'mike'`` we just use a string argument:: >>> my_collection.create_index("mike") For a compound index on ``'mike'`` descending and ``'eliot'`` ascending we need to use a list of tuples:: >>> my_collection.create_index([("mike", pymongo.DESCENDING), ... ("eliot", pymongo.ASCENDING)]) All optional index creation parameters should be passed as keyword arguments to this method. For example:: >>> my_collection.create_index([("mike", pymongo.DESCENDING)], ... background=True) Valid options include, but are not limited to: - `name`: custom name to use for this index - if none is given, a name will be generated. - `unique`: if ``True`` creates a uniqueness constraint on the index. - `background`: if ``True`` this index should be created in the background. - `sparse`: if ``True``, omit from the index any documents that lack the indexed field. - `bucketSize`: for use with geoHaystack indexes. Number of documents to group together within a certain proximity to a given longitude and latitude. - `min`: minimum value for keys in a :data:`~pymongo.GEO2D` index. - `max`: maximum value for keys in a :data:`~pymongo.GEO2D` index. - `expireAfterSeconds`: <int> Used to create an expiring (TTL) collection. MongoDB will automatically delete documents from this collection after <int> seconds. The indexed field must be a UTC datetime or the data will not expire. - `partialFilterExpression`: A document that specifies a filter for a partial index. - `collation` (optional): An instance of :class:`~pymongo.collation.Collation`. This option is only supported on MongoDB 3.4 and above. See the MongoDB documentation for a full list of supported options by server version. .. warning:: `dropDups` is not supported by MongoDB 3.0 or newer. The option is silently ignored by the server and unique index builds using the option will fail if a duplicate value is detected. .. note:: `partialFilterExpression` requires server version **>= 3.2** .. note:: The :attr:`~pymongo.collection.Collection.write_concern` of this collection is automatically applied to this operation when using MongoDB >= 3.4. :Parameters: - `keys`: a single key or a list of (key, direction) pairs specifying the index to create - `session` (optional): a :class:`~pymongo.client_session.ClientSession`. - `**kwargs` (optional): any additional index creation options (see the above list) should be passed as keyword arguments .. versionchanged:: 3.6 Added ``session`` parameter. Added support for passing maxTimeMS in kwargs. .. versionchanged:: 3.4 Apply this collection's write concern automatically to this operation when connected to MongoDB >= 3.4. Support the `collation` option. .. versionchanged:: 3.2 Added partialFilterExpression to support partial indexes. .. versionchanged:: 3.0 Renamed `key_or_list` to `keys`. Removed the `cache_for` option. :meth:`create_index` no longer caches index names. Removed support for the drop_dups and bucket_size aliases. .. mongodoc:: indexes """ keys = helpers._index_list(keys) name = kwargs.setdefault("name", helpers._gen_index_name(keys)) cmd_options = {} if "maxTimeMS" in kwargs: cmd_options["maxTimeMS"] = kwargs.pop("maxTimeMS") self.__create_index(keys, kwargs, session, **cmd_options) return name
def __create_index(self, keys, index_options, session, **kwargs): """Internal create index helper. :Parameters: - `keys`: a list of tuples [(key, type), (key, type), ...] - `index_options`: a dict of index options. - `session` (optional): a :class:`~pymongo.client_session.ClientSession`. """ index_doc = helpers._index_document(keys) index = {"key": index_doc} collation = validate_collation_or_none( index_options.pop('collation', None)) index.update(index_options) with self._socket_for_writes() as sock_info: if collation is not None: if sock_info.max_wire_version < 5: raise ConfigurationError( 'Must be connected to MongoDB 3.4+ to use collations.') else: index['collation'] = collation cmd = SON([('createIndexes', self.name), ('indexes', [index])]) cmd.update(kwargs) self._command( sock_info, cmd, read_preference=ReadPreference.PRIMARY, codec_options=_UNICODE_REPLACE_CODEC_OPTIONS, write_concern=self._write_concern_for(session), session=session)
def ensure_index(self, key_or_list, cache_for=300, **kwargs):
"""**DEPRECATED** - Ensures that an index exists on this collection.
.. versionchanged:: 3.0
**DEPRECATED**
"""
warnings.warn("ensure_index is deprecated. Use create_index instead.",
DeprecationWarning, stacklevel=2)
# The types supported by datetime.timedelta.
if not (isinstance(cache_for, integer_types) or
isinstance(cache_for, float)):
raise TypeError("cache_for must be an integer or float.")
if "drop_dups" in kwargs:
kwargs["dropDups"] = kwargs.pop("drop_dups")
if "bucket_size" in kwargs:
kwargs["bucketSize"] = kwargs.pop("bucket_size")
keys = helpers._index_list(key_or_list)
name = kwargs.setdefault("name", helpers._gen_index_name(keys))
# Note that there is a race condition here. One thread could
# check if the index is cached and be preempted before creating
# and caching the index. This means multiple threads attempting
# to create the same index concurrently could send the index
# to the server two or more times. This has no practical impact
# other than wasted round trips.
if not self.__database.client._cached(self.__database.name,
self.__name, name):
self.__create_index(keys, kwargs, session=None)
self.__database.client._cache_index(self.__database.name,
self.__name, name, cache_for)
return name
return None