1. 程式人生 > >最近點對問題的複雜度為O(n)的解法

最近點對問題的複雜度為O(n)的解法

在演算法導論中給出的方法:分治過程中同時進行歸併排序的分治法,它是分治求最短距離的時候同時對左子集和右子集進行歸併歸併,最終複雜度為T(n) = 2T(n/2) + O(n) = O(nlogn)

然而,按照《資料結構與演算法》黑皮書280頁的描述,取d=min(d1,d2),那麼在mid_x±d區間中平均有\sqrt{n}個點

那麼我們直接對這個區間中的點進行快速排序,再遍歷排序後的點計算每一個點和最近6個點的距離並更新min就可以。

那麼快速排序需要的時間為O(\sqrt{n}log\sqrt{n}),而遍歷時間小於\sqrt{n}log\sqrt{n},則總的合併時間為O(\sqrt{n}log\sqrt{n}

所以,T(n)=2(n/2) + O(\sqrt{n}log\sqrt{n}

根據主定理:

可以得時間複雜度T(n)為O(n)

此方法我們稱為有內部快速排序的分治法

然而它也有缺陷,在最壞情況下,它的複雜度可以達到O(n²)

相對於演算法導論中正常的無內部排序歸併分治法,上面這個方法是快很多的,缺點就是不穩定,而無內部排序歸併分治法比較穩定,兩者各有優勢

下面是程式碼實現和10萬到100萬的實測資料

其中:

force_get_min是蠻力法,平均複雜度O(n²)

dc_merge_get_min是演算法導論中給出的無內部排序歸併分治法,平均複雜度O(nlogn)

dc_sort_get_min則是上面介紹的有內部排序分治法,平均複雜度O(n)

#include <iostream>
#include <vector>
#include <algorithm>
#include <string>
#include <stdlib.h>
#include <math.h>
#include <time.h>
#include <windows.h>

using namespace std;
struct Point
{
	double x = 0;
	double y = 0;
};
double force_get_min(struct Point *P, int n)
{
	double mmin, t;
	mmin = (P[0].x - P[1].x)*(P[0].x - P[1].x) + (P[0].y - P[1].y)*(P[0].y - P[1].y);
	for (register int i = 0; i<n - 1; i++)
	{
		for (register int k = i + 1; k<n; k++)
		{
			t = (P[k].x - P[i].x)*(P[k].x - P[i].x) + (P[k].y - P[i].y)*(P[k].y - P[i].y);
			if (mmin>t)
				mmin = t;
		}
	}
	return sqrt(mmin);
}
void sort_x_point(struct Point *P, int left, int right)
{
	Point temp;
	if (right - left>0)
	{
		int i = left, j = right, flag = 0;
		while (i<j)
		{
			if (P[i].x>P[j].x)
			{
				temp = P[i];
				P[i] = P[j];
				P[j] = temp;
				if (flag == 0)
				{
					i++;
					flag = 1;
				}
				else
				{
					j--;
					flag = 0;
				}
			}
			else
			{
				if (flag == 0)
				{
					j--;
				}
				else
				{
					i++;
				}
			}
		}
		sort_x_point(P, left, i - 1);
		sort_x_point(P, i + 1, right);
	}
}
void sort_y_point(struct Point *P, int left, int right)
{
	Point temp;
	if (right - left>0)
	{
		int i = left, j = right, flag = 0;
		while (i<j)
		{
			if (P[i].y>P[j].y)
			{
				temp = P[i];
				P[i] = P[j];
				P[j] = temp;
				if (flag == 0)
				{
					i++;
					flag = 1;
				}
				else
				{
					j--;
					flag = 0;
				}
			}
			else
			{
				if (flag == 0)
				{
					j--;
				}
				else
				{
					i++;
				}
			}
		}
		sort_y_point(P, left, i - 1);
		sort_y_point(P, i + 1, right);
	}
}
double get_distance(struct Point P, struct Point Q)
{
	return (P.x - Q.x)*(P.x - Q.x) + (P.y - Q.y)*(P.y - Q.y);
}

double dc_sort_get_min(struct Point *P, int left, int right)
{
	if (right - left == 2)
	{
		double t1, t2, t3, t;
		t1 = get_distance(P[left], P[left + 1]);
		t2 = get_distance(P[left + 1], P[left + 2]);
		t3 = get_distance(P[left], P[left + 2]);
		t = t1 < t2 ? t1 : t2;
		t = t < t3 ? t : t3;
		return t;
	}
	if (right - left == 1)
		return get_distance(P[left], P[left + 1]);
	if (left == right)
		return 1000000000000000;

	double mid_x = 0;
	mid_x = P[(right + left) / 2].x;

	int loc_i, left_i, right_i;
	loc_i = (right + left) / 2 + 1;

	double left_min, right_min;

	left_min = dc_sort_get_min(P, left, loc_i - 1);
	right_min = dc_sort_get_min(P, loc_i, right);

	double mmin = left_min<right_min ? left_min : right_min;
	double temp_t;
	for (left_i = left; left_i<loc_i; left_i++)
	{
		if (mid_x - P[left_i].x < mmin)
			break;
	}
	for (right_i = right; right_i >= loc_i; right_i--)
	{
		if (P[right_i].x - mid_x < mmin)
			break;
	}

	sort_y_point(P, loc_i, right_i);

	for (int i = left_i; i<loc_i; i++)
	{
		for (int k = loc_i; k <= right_i; k++)
		{
			if (P[i].y - P[k].y > mmin)
				continue;
			if (P[k].y - P[i].y > mmin)
				break;
			temp_t = get_distance(P[k], P[i]);
			if (temp_t<mmin)
				mmin = temp_t;
		}
	}
	return mmin;
}

double dc_merge_get_min(int left, int right, struct Point *px, struct Point *py,struct Point *ptemp,struct Point *check){
	if (left > right) 
		return 1000000000;
	if (left == right)
	{
		py[left] = px[left];
		return 1000000000;
	}
	if (right - left == 1)
	{
		if (px[left].y > px[right].y)
		{
			py[left] = px[right];
			py[right] = px[left];
		}
		else
		{
			py[left] = px[left];
			py[right] = px[right];
		}
		return get_distance(px[left], px[right]);
	}
	if (right - left == 2){
		int flag;
		if (px[left].y < px[left + 1].y)
		{
			py[left] = px[left];
			flag = left + 1;
		}
		else
		{
			py[left] = px[left + 1];
			flag = left;
		}
		if (px[left + 2].y < py[left].y)
		{
			py[left + 1] = py[left];
			py[left + 2] = px[flag];
			py[left] = px[left + 2];
		}
		else
		{
			if (px[flag].y < px[left + 2].y)
			{
				py[left + 1] = px[flag];
				py[left + 2] = px[left + 2];
			}
			else
			{
				py[left + 1] = px[left + 2];
				py[left + 2] = px[flag];
			}
		}
		double x1, x2, x3,x;
		x1 = get_distance(px[left], px[left + 1]);
		x2 = get_distance(px[left], px[right]);
		x3 = get_distance(px[left + 1], px[right]);
		x = x1 < x2 ? x1 : x2;
		x = x < x3 ? x : x3;
		return x;
	}
	double a, b, min_distance;
	int center = (left + right) / 2;
	a = dc_merge_get_min(left, center, px, py, ptemp, check);
	b = dc_merge_get_min(center + 1, right, px, py, ptemp, check);
	min_distance = a > b ? b : a;

	double midx = px[center].x;
	int leftend = center, length = right - left + 1, tempi = left;
	center = center + 1;
	while (left <= leftend && center <= right)
	{
		if (py[left].y <= py[center].y)
			ptemp[tempi++] = py[left++];
		else
			ptemp[tempi++] = py[center++];
	}
	while (left <= leftend) 
		ptemp[tempi++] = py[left++];
	while (center <= right) 
		ptemp[tempi++] = py[center++];

	int checki = 0;
	double temp;
	for (int i = 0; i < length; i++, right--)
	{
		py[right] = ptemp[right];
		if (py[right].x > midx - min_distance && py[right].x < midx + min_distance)
		{
			check[checki++] = py[right];
		}
	}
	for (int i = 0; i < checki; i++)
	{
		for (int j = i + 1; j < i + 8 && j < checki; j++)
		{
			temp = get_distance(check[i], check[j]);
			min_distance = min_distance < temp ? min_distance : temp;
		}
	}
	return min_distance;
}
int main()
{
	int num = 10;
	int mod = 100000;
	int *a = new int[num];
	for (int i = 1; i <= num; i++)
		a[i - 1] = i*mod;
	struct Point *A = new struct Point[num * mod + 1];
	struct Point *B = new struct Point[num * mod + 1];
	struct Point *C = new struct Point[num * mod + 1];
	struct Point *Check = new struct Point[num * mod + 1];
	struct Point *TEMP = new struct Point[num * mod + 1];
	double fenzhi_get = 0, dc_get_m = 0;
	double time = 0, total_time = 0;
	long long t1, t2, tt1=0,tt2=0;
	for (int k = 0; k<num; k++)
	{
		cout << "資料規模為:" << a[k] << endl;
		tt1 = 0;
		tt2 = 0;
		for (int b = 0; b < 20; b++)
		{
			for (int i = 0; i < a[k]; i++)
			{
				A[i].x = rand() % 10000000000000000;
				A[i].y = rand() % 10000000000000000;
				B[i].x = A[i].x;
				B[i].y = A[i].y;
			}
			t1 = clock();
			sort_x_point(B, 0, a[k] - 1);
			fenzhi_get = sqrt(dc_merge_get_min(0, a[k] - 1, B, C, TEMP, Check));
			t2 = clock();
			tt1 += t2 - t1;
			
			t1 = clock();
			sort_x_point(A, 0, a[k] - 1);
			dc_get_m = sqrt(dc_sort_get_min(A, 0, a[k] - 1));
			t2 = clock();
			tt2 += t2 - t1;
			
		}
		cout << "無內部排序分治20次平均時間:"  << tt1 / 20<< endl;
		cout << "有內部排序分治20次平均時間:"  << tt2 / 20 <<endl;
	}
	delete[] A;
	delete[] B;
	delete[] C;
	delete[] Check;
	delete[] TEMP;
	cin >> t1;
	return 0;
}

10萬到100萬規模的實測資料:

以上還是有一些考慮不足地方的,歡迎交流