1. 程式人生 > >(Django)對object.all()等大量資料的QuerySet限制記憶體使用

(Django)對object.all()等大量資料的QuerySet限制記憶體使用

問題

在django的使用中,經常會出現大量資料的遍歷操作,或者是對大量資料進行遍歷遷移跟新,比如

for user in User.objects.all():
    user.A = user.B
    User.B = None

等種種情況。

在本地開發環境中QuerySet物件最初具有非常小的記憶體佔用,隨著業務量的增長QuerySet物件在我遍歷它們時快取每個model_instance,all()返回的QuerySet會越來越來,可能最終耗盡記憶體,被託管服務提供商殺死執行緒。

解決方法:

import copy
from decimal import Decimal


class
MemorySavingQuerysetIterator(object):
def __init__(self, queryset, max_obj_num=1000): self._base_queryset = queryset self._generator = self._setup() self.max_obj_num = max_obj_num def _setup(self): for i in xrange(0, self._base_queryset.count(), self.max_obj_num): # By making a copy of of the queryset and using that to actually
# access the objects we ensure that there are only `max_obj_num` # objects in memory at any given time smaller_queryset = copy.deepcopy(self._base_queryset )[i:i + self.max_obj_num] # logger.debug('Grabbing next %s objects from DB' % self.max_obj_num)
for obj in smaller_queryset.iterator(): yield obj def __iter__(self): return self def next(self): return self._generator.next()

呼叫:

Users = User.objects.all()
for user in MemorySavingQuerysetIterator(users, 100):
    Pass

python mysql原生操作

import MySQLdb

class QuerySetIterator(object):
    def __init__(self, cursor, query, max_num):
        self.query = query
        self.max_num = max_num
        self._cursor = cursor
        self._generator = self._setup()

    def _setup(self):
        for i in xrange(0, 90000000, self.max_num):
            new_query = "{query} limit {limit} offset {offset}".format(
                query=self.query, limit=self.max_num, offset=i
            )
            self._cursor.execute(new_query)
            result = self._cursor.fetchall()
            if not result:
                break
            for obj in result:
                yield obj

    def __iter__(self):
        return self

    def next(self):
        return self._generator.next()


class TestModel(object):
    db = MySQLdb.connect("localhost", "root", "123456", "test")
    cursor = db.cursor()

    def __init__(self, tb_name, max_num=100):
        self.tb_name = tb_name
        self.max_num = max_num
        self._query_sql_tpl = "select * from {tb_name}".format(tb_name=tb_name)

    def query_all(self, query_sql=None):
        if not query_sql:
            query_sql = self._query_sql_tpl
        return QuerySet(self.cursor, query_sql, self.max_num)

test = TestModel('test')
result = test.query_all()

for obj in result:
    print obj