如何提高MySql資料庫子查詢的效率
做django的專案,用python語言寫查詢資料庫的語句,使用OuterRef, Subquery,Exists,F建立複雜的查詢語句:
資料庫模型:
from django.db import models # 經緯度唯一表 class LngLat(models.Model): lng = models.DecimalField(max_digits=10, decimal_places=7, default=0.0) lat = models.DecimalField(max_digits=9, decimal_places=7, default=0.0) class Meta: unique_together = ("lng", "lat") def __str__(self): return str(self.lng) + "," + str(self.lat) # 浙江新建站小區物件 class ZJenb(models.Model): enbname = models.CharField(max_length=100, default=None) enbid = models.IntegerField() # 浙江新建站小區物件 class ZJcell(models.Model): # ECI (28 Bits) = eNB ID(20 Bits) + Cell ID(8 Bits)(邏輯小區id,範圍0~255) eci = models.IntegerField(primary_key=True, default=None) cgi = models.CharField(max_length=100, default=None, unique=True) cellid = models.IntegerField(default=None) cellname = models.CharField(max_length=100, default=None) # 頻段,如F1,F2,D1,D2,D3 freq = models.CharField(max_length=100, default=None) pci = models.IntegerField() lnglat = models.ForeignKey(LngLat, on_delete=models.CASCADE, default=None) # 天線方位角 azimuth = models.IntegerField() # 模3值 m3 = models.IntegerField(null=True) # 鄰區關係,相當於單向鄰區,雖然可以反向查詢,但不是雙向鄰區。 # symmetrical=True表示對稱,加了正向,反向自動新增 adj = models.ManyToManyField("self", symmetrical=False, through='ZJadjacent', through_fields=('cellfrom', 'cellto')) enb = models.ForeignKey(ZJenb, on_delete=models.CASCADE, default=None, null=True) def __str__(self): return self.cellname # 鄰區多對多關係表 class ZJadjacent(models.Model): cellfrom = models.ForeignKey(ZJcell, on_delete=models.CASCADE, default=None, related_name='adj_from_cell') # 因為有兩個ZJcell外來鍵,必須對這兩個外來鍵命名為不同的名字,否則出現報錯fields.E304 cellto = models.ForeignKey(ZJcell, on_delete=models.CASCADE, default=None, related_name='adj_to_cell') # 兩個鄰區間的距離 distance = models.IntegerField(default=None) # 站在源小區,看目標小區的方位角 azi1 = models.IntegerField(default=None) # 站在目標小區,看源小區的方位角 azi2 = models.IntegerField(default=None) # 是否存在反向鄰區default=None, null=True # Like BooleanField with null=True. # Use that instead of this field as it’s likely to be deprecated in a future version of Django. has_inverse_adj = models.NullBooleanField() # 唯一標識,源小區的cgi&目的小區的cgi,如 460-00-325632-129&460-00-325632-130,避免資料庫中出現重複資料 # , null=False 這個引數加不加對資料庫沒有更改,因為False是預設值 # (venv) C:\\Users\Administrator\PycharmProjects\cellsmap>py manage.py makemigrations # No changes detected source_target_cgi = models.BigIntegerField(primary_key=True, default=None) # models.CharField(max_length=100, default=None, unique=True) def __str__(self): return str(self.source_target_cgi)
解釋一下:
一共4張表,小區表ZJcell是主表,有兩個外來鍵(經緯度表LngLat,基站表ZJenb),還有一個多對多關係(鄰區ZJadjacent),其中ZJcell有31965條資料,ZJadjacent有838118條資料。
需要提取ZJadjacent鄰區關係中,只有單向鄰區的資料或有雙向鄰區的資料,將其資料中預設為空的has_inverse_adj項分別設定為False或True。
根據django官網子查詢的例子,寫了一個子查詢邏輯,用以判斷每條鄰區資料是否反向鄰區:
from django.db.models import OuterRef, Subquery from dbbackend.models import ZJcell, LngLat, ZJenb, ZJadjacent from django.db.models import F from django.db.models import Exists inverseadj = ZJadjacent.objects.annotate(iadj=2**32*F('cellto_id') + F('cellfrom_id')).filter(iadj=OuterRef('source_target_cgi')) symmetricaladj = ZJadjacent.objects.annotate(equaladj=Subquery(inverseadj.values('source_target_cgi')))
解釋一下:
ZJadjacent原表中的source_target_cgi欄位(列)表示(源小區eci乘以2的32次方+目標小區eci)的和,想要與此表中其他條目的(目標eci乘以2的32次方+源小區eci)的和進行比對,如果能找出相等的項,說明存在雙向鄰區,否則只存在單向鄰區。
由於(目標eci乘以2的32次方+源小區eci)在原表中不存在,需要在比對前新增這一列,於是使用語句:
annotate(iadj=2**32*F('cellto_id') + F('cellfrom_id'))
等效sql語言:
SELECT `dbbackend_zjadjacent`.`cellfrom_id`, `dbbackend_zjadjacent`.`cellto_id`, `dbbackend_zjadjacent`.`distance`, `dbbackend_zjadjacent`.`azi1`, `dbbackend_zjadjacent`.`azi2`, `dbbackend_zjadjacent`.`has_inverse_adj`, `dbbackend_zjadjacent`.`source_target_cgi`, (SELECT U0.`cellfrom_id`, U0.`cellto_id`, U0.`distance`, U0.`azi1`, U0.`azi2`, U0.`has_inverse_adj`, U0.`source_target_cgi`, ((4294967296 * U0.`cellto_id`) + U0.`cellfrom_id`) AS `iadj` FROM `dbbackend_zjadjacent` U0 WHERE ((4294967296 * U0.`cellto_id`) + U0.`cellfrom_id`) = (`dbbackend_zjadjacent`.`source_target_cgi`) ) AS `equaladj` FROM `dbbackend_zjadjacent`
實際查詢時,發現效率很低,只是計數symmetricaladj.count()都很慢。
第一次改進,使用Exists()函式:
from django.db.models import OuterRef, Subquery
from dbbackend.models import ZJcell, LngLat, ZJenb, ZJadjacent
from django.db.models import F
from django.db.models import Exists
inverseadj = ZJadjacent.objects.annotate(iadj=2**32*F('cellto_id') + F('cellfrom_id')).filter(iadj=OuterRef('source_target_cgi'))
symmetricaladjsource = ZJadjacent.objects.annotate(equaladj=Exists(inverseadj))
等效sql:
SELECT
`dbbackend_zjadjacent`.`cellfrom_id`,
`dbbackend_zjadjacent`.`cellto_id`,
`dbbackend_zjadjacent`.`distance`,
`dbbackend_zjadjacent`.`azi1`,
`dbbackend_zjadjacent`.`azi2`,
`dbbackend_zjadjacent`.`has_inverse_adj`,
`dbbackend_zjadjacent`.`source_target_cgi`,
EXISTS(SELECT U0.`cellfrom_id`,
U0.`cellto_id`,
U0.`distance`,
U0.`azi1`, U0.`azi2`,
U0.`has_inverse_adj`,
U0.`source_target_cgi`,
((4294967296 * U0.`cellto_id`) + U0.`cellfrom_id`) AS `iadj`
FROM `dbbackend_zjadjacent` U0
WHERE
((4294967296 * U0.`cellto_id`) + U0.`cellfrom_id`) =
(`dbbackend_zjadjacent`.`source_target_cgi`)
) AS `equaladj`
FROM `dbbackend_zjadjacent`
測試結果依然很慢,仔細分析sql語句,發現sql語句類似於迴圈,父查詢時外部迴圈,子查詢是內部迴圈,父查詢每執行一次都會執行一遍子查詢,整體相當於執行了 838118次子查詢,所以子查詢的語句需要儘量精簡,否則嚴重影響效率。
第二次修改,將annotate(iadj=2**32*F('cellto_id') + F('cellfrom_id'))從子查詢移出,放到父查詢中:
inverseadj = ZJadjacent.objects.filter(source_target_cgi=OuterRef('iadj'))
symmetricaladjsource = ZJadjacent.objects.annotate(iadj=2**32*F('cellto_id') + F('cellfrom_id')).annotate(equaladj=Exists(inverseadj))
等效sql:
SELECT
`dbbackend_zjadjacent`.`cellfrom_id`,
`dbbackend_zjadjacent`.`cellto_id`,
`dbbackend_zjadjacent`.`distance`,
`dbbackend_zjadjacent`.`azi1`,
`dbbackend_zjadjacent`.`azi2`,
`dbbackend_zjadjacent`.`has_inverse_adj`,
`dbbackend_zjadjacent`.`source_target_cgi`,
((4294967296 * `dbbackend_zjadjacent`.`cellto_id`) +
`dbbackend_zjadjacent`.`cellfrom_id`) AS `iadj`,
EXISTS(SELECT U0.`cellfrom_id`,
U0.`cellto_id`,
U0.`distance`,
U0.`azi1`,
U0.`azi2`,
U0.`has_inverse_adj`,
U0.`source_target_cgi`
FROM `dbbackend_zjadjacent` U0
WHERE
U0.`source_target_cgi` =
(((4294967296 * `dbbackend_zjadjacent`.`cellto_id`) +
`dbbackend_zjadjacent`.`cellfrom_id`))
) AS `equaladj`
FROM `dbbackend_zjadjacent`
再次後,效率明顯提升,執行count()函式只需等待n秒左右。
symmetricaladjsource
<QuerySet [<ZJadjacent: 358036724510097538>, <ZJadjacent: 358036724510097539>, <ZJadjacent: 358036724510099847>, <ZJadjacent: 358036724510129025>, <ZJadjacent: 358036724510131843>, <ZJadjacent: 358036724510413443>, <ZJadjacent: 358036724510439042>, <ZJadjacent: 358036724510448771>, <ZJadjacent: 358036724510570881>, <ZJadjacent: 358036724532486657>, <ZJadjacent: 358036724532486659>, <ZJadjacent: 358036724532487426>, <ZJadjacent: 358036724532487427>, <ZJadjacent: 358036724532488449>, <ZJadjacent: 358036724532488450>, <ZJadjacent: 358036724532488451>, <ZJadjacent: 358036724532489217>, <ZJadjacent: 358036724532489218>, <ZJadjacent: 358036724532494337>, <ZJadjacent: 358036724532494342>, '...(remaining elements truncated)...']>
symmetricaladjsource[:100]
<QuerySet [<ZJadjacent: 358036724510097538>, <ZJadjacent: 358036724510097539>, <ZJadjacent: 358036724510099847>, <ZJadjacent: 358036724510129025>, <ZJadjacent: 358036724510131843>, <ZJadjacent: 358036724510413443>, <ZJadjacent: 358036724510439042>, <ZJadjacent: 358036724510448771>, <ZJadjacent: 358036724510570881>, <ZJadjacent: 358036724532486657>, <ZJadjacent: 358036724532486659>, <ZJadjacent: 358036724532487426>, <ZJadjacent: 358036724532487427>, <ZJadjacent: 358036724532488449>, <ZJadjacent: 358036724532488450>, <ZJadjacent: 358036724532488451>, <ZJadjacent: 358036724532489217>, <ZJadjacent: 358036724532489218>, <ZJadjacent: 358036724532494337>, <ZJadjacent: 358036724532494342>, '...(remaining elements truncated)...']>
symmetricaladjsource[:1000]
<QuerySet [<ZJadjacent: 358036724510097538>, <ZJadjacent: 358036724510097539>, <ZJadjacent: 358036724510099847>, <ZJadjacent: 358036724510129025>, <ZJadjacent: 358036724510131843>, <ZJadjacent: 358036724510413443>, <ZJadjacent: 358036724510439042>, <ZJadjacent: 358036724510448771>, <ZJadjacent: 358036724510570881>, <ZJadjacent: 358036724532486657>, <ZJadjacent: 358036724532486659>, <ZJadjacent: 358036724532487426>, <ZJadjacent: 358036724532487427>, <ZJadjacent: 358036724532488449>, <ZJadjacent: 358036724532488450>, <ZJadjacent: 358036724532488451>, <ZJadjacent: 358036724532489217>, <ZJadjacent: 358036724532489218>, <ZJadjacent: 358036724532494337>, <ZJadjacent: 358036724532494342>, '...(remaining elements truncated)...']>
symmetricaladjsource.count()
838118
symmetricaladjsource.filter(equaladj=True)
<QuerySet [<ZJadjacent: 358036724510097538>, <ZJadjacent: 358036724510097539>, <ZJadjacent: 358036724510099847>, <ZJadjacent: 358036724510131843>, <ZJadjacent: 358036724510413443>, <ZJadjacent: 358036724510448771>, <ZJadjacent: 358036724510570881>, <ZJadjacent: 358036724532486657>, <ZJadjacent: 358036724532486659>, <ZJadjacent: 358036724532487426>, <ZJadjacent: 358036724532487427>, <ZJadjacent: 358036724532488449>, <ZJadjacent: 358036724532488450>, <ZJadjacent: 358036724532488451>, <ZJadjacent: 358036724532489217>, <ZJadjacent: 358036724532489218>, <ZJadjacent: 358036724532494337>, <ZJadjacent: 358036724532494342>, <ZJadjacent: 358036724532498690>, <ZJadjacent: 358036724532498691>, '...(remaining elements truncated)...']>
symmetricaladjsource.filter(equaladj=True).count()
711252
symmetricaladjsource.filter(equaladj=False).count()
126866
相關django官方手冊連結:
Subquery()
expressions
https://docs.djangoproject.com/en/2.1/ref/models/expressions/#subquery-expressions
Exists()
subqueries
https://docs.djangoproject.com/en/2.1/ref/models/expressions/#exists-subqueries
等效sql在pycharm中的位置: