最近點對問題的複雜度為O(n)的解法
阿新 • • 發佈:2018-11-09
在演算法導論中給出的方法:分治過程中同時進行歸併排序的分治法,它是分治求最短距離的時候同時對左子集和右子集進行歸併歸併,最終複雜度為T(n) = 2T(n/2) + O(n) = O(nlogn)
然而,按照《資料結構與演算法》黑皮書280頁的描述,取d=min(d1,d2),那麼在mid_x±d區間中平均有個點
那麼我們直接對這個區間中的點進行快速排序,再遍歷排序後的點計算每一個點和最近6個點的距離並更新min就可以。
那麼快速排序需要的時間為O(log),而遍歷時間小於log,則總的合併時間為O(log)
所以,T(n)=2(n/2) + O(log)
根據主定理:
可以得時間複雜度T(n)為O(n)
此方法我們稱為有內部快速排序的分治法
然而它也有缺陷,在最壞情況下,它的複雜度可以達到O(n²)
相對於演算法導論中正常的無內部排序歸併分治法,上面這個方法是快很多的,缺點就是不穩定,而無內部排序歸併分治法比較穩定,兩者各有優勢
下面是程式碼實現和10萬到100萬的實測資料
其中:
force_get_min是蠻力法,平均複雜度O(n²)
dc_merge_get_min是演算法導論中給出的無內部排序歸併分治法,平均複雜度O(nlogn)
dc_sort_get_min則是上面介紹的有內部排序分治法,平均複雜度O(n)
#include <iostream> #include <vector> #include <algorithm> #include <string> #include <stdlib.h> #include <math.h> #include <time.h> #include <windows.h> using namespace std; struct Point { double x = 0; double y = 0; }; double force_get_min(struct Point *P, int n) { double mmin, t; mmin = (P[0].x - P[1].x)*(P[0].x - P[1].x) + (P[0].y - P[1].y)*(P[0].y - P[1].y); for (register int i = 0; i<n - 1; i++) { for (register int k = i + 1; k<n; k++) { t = (P[k].x - P[i].x)*(P[k].x - P[i].x) + (P[k].y - P[i].y)*(P[k].y - P[i].y); if (mmin>t) mmin = t; } } return sqrt(mmin); } void sort_x_point(struct Point *P, int left, int right) { Point temp; if (right - left>0) { int i = left, j = right, flag = 0; while (i<j) { if (P[i].x>P[j].x) { temp = P[i]; P[i] = P[j]; P[j] = temp; if (flag == 0) { i++; flag = 1; } else { j--; flag = 0; } } else { if (flag == 0) { j--; } else { i++; } } } sort_x_point(P, left, i - 1); sort_x_point(P, i + 1, right); } } void sort_y_point(struct Point *P, int left, int right) { Point temp; if (right - left>0) { int i = left, j = right, flag = 0; while (i<j) { if (P[i].y>P[j].y) { temp = P[i]; P[i] = P[j]; P[j] = temp; if (flag == 0) { i++; flag = 1; } else { j--; flag = 0; } } else { if (flag == 0) { j--; } else { i++; } } } sort_y_point(P, left, i - 1); sort_y_point(P, i + 1, right); } } double get_distance(struct Point P, struct Point Q) { return (P.x - Q.x)*(P.x - Q.x) + (P.y - Q.y)*(P.y - Q.y); } double dc_sort_get_min(struct Point *P, int left, int right) { if (right - left == 2) { double t1, t2, t3, t; t1 = get_distance(P[left], P[left + 1]); t2 = get_distance(P[left + 1], P[left + 2]); t3 = get_distance(P[left], P[left + 2]); t = t1 < t2 ? t1 : t2; t = t < t3 ? t : t3; return t; } if (right - left == 1) return get_distance(P[left], P[left + 1]); if (left == right) return 1000000000000000; double mid_x = 0; mid_x = P[(right + left) / 2].x; int loc_i, left_i, right_i; loc_i = (right + left) / 2 + 1; double left_min, right_min; left_min = dc_sort_get_min(P, left, loc_i - 1); right_min = dc_sort_get_min(P, loc_i, right); double mmin = left_min<right_min ? left_min : right_min; double temp_t; for (left_i = left; left_i<loc_i; left_i++) { if (mid_x - P[left_i].x < mmin) break; } for (right_i = right; right_i >= loc_i; right_i--) { if (P[right_i].x - mid_x < mmin) break; } sort_y_point(P, loc_i, right_i); for (int i = left_i; i<loc_i; i++) { for (int k = loc_i; k <= right_i; k++) { if (P[i].y - P[k].y > mmin) continue; if (P[k].y - P[i].y > mmin) break; temp_t = get_distance(P[k], P[i]); if (temp_t<mmin) mmin = temp_t; } } return mmin; } double dc_merge_get_min(int left, int right, struct Point *px, struct Point *py,struct Point *ptemp,struct Point *check){ if (left > right) return 1000000000; if (left == right) { py[left] = px[left]; return 1000000000; } if (right - left == 1) { if (px[left].y > px[right].y) { py[left] = px[right]; py[right] = px[left]; } else { py[left] = px[left]; py[right] = px[right]; } return get_distance(px[left], px[right]); } if (right - left == 2){ int flag; if (px[left].y < px[left + 1].y) { py[left] = px[left]; flag = left + 1; } else { py[left] = px[left + 1]; flag = left; } if (px[left + 2].y < py[left].y) { py[left + 1] = py[left]; py[left + 2] = px[flag]; py[left] = px[left + 2]; } else { if (px[flag].y < px[left + 2].y) { py[left + 1] = px[flag]; py[left + 2] = px[left + 2]; } else { py[left + 1] = px[left + 2]; py[left + 2] = px[flag]; } } double x1, x2, x3,x; x1 = get_distance(px[left], px[left + 1]); x2 = get_distance(px[left], px[right]); x3 = get_distance(px[left + 1], px[right]); x = x1 < x2 ? x1 : x2; x = x < x3 ? x : x3; return x; } double a, b, min_distance; int center = (left + right) / 2; a = dc_merge_get_min(left, center, px, py, ptemp, check); b = dc_merge_get_min(center + 1, right, px, py, ptemp, check); min_distance = a > b ? b : a; double midx = px[center].x; int leftend = center, length = right - left + 1, tempi = left; center = center + 1; while (left <= leftend && center <= right) { if (py[left].y <= py[center].y) ptemp[tempi++] = py[left++]; else ptemp[tempi++] = py[center++]; } while (left <= leftend) ptemp[tempi++] = py[left++]; while (center <= right) ptemp[tempi++] = py[center++]; int checki = 0; double temp; for (int i = 0; i < length; i++, right--) { py[right] = ptemp[right]; if (py[right].x > midx - min_distance && py[right].x < midx + min_distance) { check[checki++] = py[right]; } } for (int i = 0; i < checki; i++) { for (int j = i + 1; j < i + 8 && j < checki; j++) { temp = get_distance(check[i], check[j]); min_distance = min_distance < temp ? min_distance : temp; } } return min_distance; } int main() { int num = 10; int mod = 100000; int *a = new int[num]; for (int i = 1; i <= num; i++) a[i - 1] = i*mod; struct Point *A = new struct Point[num * mod + 1]; struct Point *B = new struct Point[num * mod + 1]; struct Point *C = new struct Point[num * mod + 1]; struct Point *Check = new struct Point[num * mod + 1]; struct Point *TEMP = new struct Point[num * mod + 1]; double fenzhi_get = 0, dc_get_m = 0; double time = 0, total_time = 0; long long t1, t2, tt1=0,tt2=0; for (int k = 0; k<num; k++) { cout << "資料規模為:" << a[k] << endl; tt1 = 0; tt2 = 0; for (int b = 0; b < 20; b++) { for (int i = 0; i < a[k]; i++) { A[i].x = rand() % 10000000000000000; A[i].y = rand() % 10000000000000000; B[i].x = A[i].x; B[i].y = A[i].y; } t1 = clock(); sort_x_point(B, 0, a[k] - 1); fenzhi_get = sqrt(dc_merge_get_min(0, a[k] - 1, B, C, TEMP, Check)); t2 = clock(); tt1 += t2 - t1; t1 = clock(); sort_x_point(A, 0, a[k] - 1); dc_get_m = sqrt(dc_sort_get_min(A, 0, a[k] - 1)); t2 = clock(); tt2 += t2 - t1; } cout << "無內部排序分治20次平均時間:" << tt1 / 20<< endl; cout << "有內部排序分治20次平均時間:" << tt2 / 20 <<endl; } delete[] A; delete[] B; delete[] C; delete[] Check; delete[] TEMP; cin >> t1; return 0; }
10萬到100萬規模的實測資料:
以上還是有一些考慮不足地方的,歡迎交流