幾大常用排序演算法編寫及正確性、效率測試
阿新 • • 發佈:2019-02-11
排序演算法寫了幾遍,總是過段時間就忘,故在此彙總下。
寫排序演算法重要的是理解它的原理,找到如何遍歷及遍歷和終止的條件
插入排序
從左建立有序區,將右側的值依次插入該有序區,有序區中從插入的位置開始依次後移一位;從左往右遍歷
void InsertSort(std::vector<int>& datas)
{
//等待排序區間,插入到有序區
for(size_t i = 1; i < datas.size(); ++i)
{
//有序區間
for(size_t j = 0; j < i; ++j)
{
//按從小到大的順序,遇到第一個比他小的值即終止,依次後移
if(datas[i] < datas[j])
{
int value = datas[i];
for(size_t k = i; k > j; --k)
datas[k] = datas[k-1];
datas[j] = value;
break;
}
}
}
}
氣泡排序
從左到右相鄰的數依次比較,值最大或最小的依次冒出;從右往左遍歷
void BubbleSort(std::vector<int>& datas)
{
//當前需排序的值
for(size_t i = datas.size(); i > 0 ; --i)
{
//從第一個位置開始,依次冒出相鄰兩個數
for(size_t j = 0; j < i-1; ++j)
{
if(datas[j] > datas[j+1])
{
datas[j] += datas[j+1];
datas[j+1] = datas[j] - datas[j+1];
datas[j] -= datas[j+1];
}
}
}
}
選擇排序
從第一個位置起,依次選擇出該位置到結束的最小或最大值,放在當前位置
void SelectSort(std::vector<int>& datas)
{
for(size_t i = 0; i < datas.size(); ++i)
{
for(size_t j = i+1; j < datas.size(); ++j)
{
if(datas[i] > datas[j])
{
datas[i] ^= datas[j];
datas[j] ^= datas[i];
datas[i] ^= datas[j];
}
}
}
}
歸併排序
兩兩比較排序;以2的倍數逐漸歸併
void MergeSort(std::vector<int>& datas)
{
//兩兩比較一次
for(size_t i = 0; i+1 < datas.size(); i+=2)
{
if(datas[i] > datas[i+1])
{
int value = datas[i];
datas[i] = datas[i+1];
datas[i+1] = value;
}
}
std::vector<int> *new_datas = new std::vector<int>();
new_datas->assign(datas.size(), 0);
//逐漸歸併,每次歸併的大小,2,4,8,16...
for(size_t gap = 2; gap < datas.size(); gap*=2)
{
//當gap=2時:1,2與3,4歸併; 5,6與7,8歸併 至結束
int assign_idx =0;
for(size_t idx = 0; idx+gap < datas.size(); idx+=2*gap)
{
size_t i = 0;
size_t j = 0;
//歸併時依次選擇較小的數
while(i < gap && j < gap && idx+gap+j < datas.size())
{
if(datas[idx+i] < datas[idx+gap+j])
(*new_datas)[assign_idx++] = datas[idx+(i++)];
else
(*new_datas)[assign_idx++] = datas[idx+gap+(j++)];
}
if(i==gap)
{
//注意右側的值不能越界
for(; j < gap && idx+gap+j < datas.size();)
(*new_datas)[assign_idx++] = datas[idx+gap+(j++)];
}
else
{
for(; i < gap;)
(*new_datas)[assign_idx++] = datas[idx+(i++)];
}
}
//將當前已歸併的數賦值給data;開始使用swap,但最後一組資料有可能沒賦值給new_datas,會出錯
for(int i = 0; i < assign_idx; ++i)
datas[i] = (*new_datas)[i];
}
delete new_datas;
}
堆排序
1.第一次時從含葉子節點處開始,建立一次大根堆 2.將堆頂依次與堆底交換,新的堆頂資料與較大值交換,排序lg(n)次,該堆又為有序堆
void HeapOrder(std::vector<int>& data, size_t cur, size_t max)
{
size_t l = 2*cur;//左節點
size_t r = 2*cur + 1;//右節點
size_t large = cur;
if(l <= max && data[large-1] < data[l-1])
large = l;
if(r <= max && data[large-1] < data[r-1])
large = r;
//當堆頂比左右節點大,返回;當堆頂為左或者右節點時,遞迴其左或者右節點,直到結束
if(large != cur)
{
int value = data[cur-1];
data[cur-1] = data[large-1];
data[large-1] = value;
HeapOrder(data, large, max);
}
}
void HeapSort(std::vector<int>& datas)
{
//第一次堆排序
for(size_t i = datas.size()/2; i > 0; --i)
HeapOrder(datas, i, datas.size());
//當前未排序的堆大小
for(size_t unsorted_pos = datas.size(); unsorted_pos > 1; --unsorted_pos)
{
//將堆中最大或最小值賦值給和未排序堆最後一位交換位置,未排序堆個數將-1
int value = datas[0];
datas[0] = datas[unsorted_pos-1];
datas[unsorted_pos-1] = value;
HeapOrder(datas, 1, unsorted_pos-1);
}
}
快速排序
1.從第一個位置開始,從最右往左遍歷(如果從左往右比較的結果將無效):比它大或者等於,右側值遞減;比它小交換兩個數,再從左往右遍歷
2.從左往右遍歷:比它小或者等於,左側值遞增;比它大交換兩個數,再從右往左遍歷
3.直到左右兩個數相等,當前該數左側的值小於等於它,右側的值大於等於它,這個數已排序好
4.遞迴排序它的左側,它的右側
void QuickSort(std::vector<int>& datas, size_t low, size_t high)
{
if(low >= high)
return;
size_t left = low;
size_t right = high;
//最開始從右往左開始遍歷
bool search_from_right = true;
while(left != right)
{
if(search_from_right)
{
if(datas[left-1] <= datas[right-1])
right--;
else
{
int value = datas[left-1];
datas[left-1] = datas[right-1];
datas[right-1] = value;
search_from_right = false;
left++;
}
}
else
{
if(datas[left-1] <= datas[right-1])
left++;
else
{
int value = datas[right-1];
datas[right-1] = datas[left-1];
datas[left-1] = value;
search_from_right = true;
right--;
}
}
}
QuickSort(datas, low, left-1);
QuickSort(datas, left+1, high);
}
///快速排序
void QuickSort(std::vector<int>& datas)
{
QuickSort(datas, 1, datas.size());
}
計數排序
分配較大空間數,出現該數遞增,遍歷空間
void CountSort(std::vector<int>& datas, int max)
{
static std::vector<int> count_array(max, 0);
count_array.assign(max, 0);
for(size_t i = 0; i < datas.size(); ++i)
count_array[datas[i]-1]++;
int cur_pos = 0;
for(int i = 0; i < max; ++i)
{
if(count_array[i] != 0)
{
for(int j = count_array[i]; j != 0; --j)
datas[cur_pos++] = i+1;
}
}
}
//為了方便後續測試,將1000000 作為最大數
void CountSort(std::vector<int>& datas)
{
static std::vector<int> count_array(1000000, 0);
count_array.assign(1000000, 0);
for(size_t i = 0; i < datas.size(); ++i)
count_array[datas[i]-1]++;
int cur_pos = 0;
for(int i = 0; i < 1000000; ++i)
{
if(count_array[i] != 0)
{
for(int j = count_array[i]; j != 0; --j)
datas[cur_pos++] = i+1;
}
}
}
基數排序
開始分配基數個空間,當前位在哪個基數上,就新增給那個基數空間;從左到右,每次排序後將當前排序值賦值給datas;從低位到高位依次排序
void RadixSort(std::vector<int>& datas, const int radix)
{
bool is_run = true;
int radix_pos = 0;
static vector<vector<int> > radix_vecs(radix);
radix_vecs.assign(radix, vector<int>());
while(is_run)
{
is_run = false;
for(size_t i = 0; i < datas.size(); ++i)
{
int pow_value = std::pow(radix, radix_pos);
int div_value = datas[i]/pow_value;
//當前選擇哪個位置
int mod_value = div_value%radix;
//當高位還有值時繼續遍歷,所有高位都為0時,停止遍歷
if(div_value/radix != 0)
is_run = true;
//按位置儲存
radix_vecs[mod_value].push_back(datas[i]);
}
++radix_pos;
//將當前位排序好後賦值給datas
for(int i = 0, idx = 0; i < radix; ++i)
{
for(size_t j = 0; j < radix_vecs[i].size(); ++j)
datas[idx++] = radix_vecs[i][j];
radix_vecs[i].clear();
}
}
}
//為了方便後續測試,將10 作為基數
void RadixSort(std::vector<int>& datas)
{
bool is_run = true;
int radix_pos = 0;
static vector<vector<int> > radix_vecs(10);
radix_vecs.assign(10, vector<int>());
while(is_run)
{
is_run = false;
for(size_t i = 0; i < datas.size(); ++i)
{
int pow_value = std::pow(10, radix_pos);
int div_value = datas[i]/pow_value;
//當前選擇哪個位置
int mod_value = div_value%10;
if(div_value/10 != 0)
is_run = true;
//按位置儲存
radix_vecs[mod_value].push_back(datas[i]);
}
++radix_pos;
//將當前位排序好後賦值給datas
for(int i = 0, idx = 0; i < 10; ++i)
{
for(size_t j = 0; j < radix_vecs[i].size(); ++j)
datas[idx++] = radix_vecs[i][j];
radix_vecs[i].clear();
}
}
}
測試程式碼
#include <iostream>
#include <vector>
#include <time.h>
#include <algorithm>
#include <limits>
#include <map>
#include <sys/time.h>
#include <functional>
using namespace std;
typedef void (*SortFun)(std::vector<int>&);
//輸出資料
template<class SStream>
SStream& operator << (SStream& os, const vector<int>& vec)
{
for(size_t idx = 0; idx < vec.size(); ++idx)
os << vec[idx] << ",";
os << endl;
return os;
}
//隨機分配資料,count_max:分配的最大個數 value_max:分配的最大值
void GetData(std::vector<int>& datas, int count_max = 10000, int value_max = 1000000)
{
int rand_count = rand()%count_max+1;
int rand_num;
for(int i = 0; i < rand_count; ++i)
{
rand_num = rand()%value_max;
datas.push_back(rand_num);
}
}
//檢驗是否是有序
bool CheckSort(const std::vector<int>& datas)
{
for(size_t idx = 0; idx+1 < datas.size(); ++idx)
{
if(datas[idx+1] < datas[idx])
{
cout << datas << endl;
cout << "idx:" << idx << ", " << datas[idx] << ", " << datas[idx+1] << endl;
return false;
}
}
return true;
}
//檢驗兩結果是否一致
bool CheckResult(const std::vector<int>& datas1, const std::vector<int>& datas2)
{
if(datas1.size() != datas2.size())
return false;
for(size_t i = 0; i < datas1.size(); ++i)
if(datas1[i] != datas2[i])
return false;
return true;
}
int main()
{
srand(time(NULL));
//測試多少次
int test_times = 1000;
//獲取資料
vector<vector<int> *>* total_datas = new vector<vector<int> *>();
//產生1000個這樣的測試資料
for(int i = 0; i < test_times; ++i)
{
vector<int> *tmp = new vector<int>();
GetData(*tmp);
total_datas->push_back(tmp);
}
cout << "Create Data success!" << endl;
//檢驗正確性
for(size_t i = 0; i < total_datas->size(); ++i)
{
vector<int> *tmp = new vector<int>();
vector<int> *tmp1 = new vector<int>();
//前三種排序速度太慢,可先將test_times設成較小值測試正確性
//*tmp = *((*total_datas)[i]);
//InsertSort(*tmp);
//CheckSort(*tmp);
//*tmp = *((*total_datas)[i]);
//BubbleSort(*tmp);
//CheckSort(*tmp);
//*tmp = *((*total_datas)[i]);
//SelectSort(*tmp);
//CheckSort(*tmp);
*tmp = *((*total_datas)[i]);
QuickSort(*tmp);
CheckSort(*tmp);
*tmp = *((*total_datas)[i]);
HeapSort(*tmp);
//如果資料不正確,可將資料輸出後進行斷點除錯
if(!CheckSort(*tmp))
{
cout << *((*total_datas)[i]) << endl;
}
*tmp = *((*total_datas)[i]);
MergeSort(*tmp);
CheckSort(*tmp);
*tmp = *((*total_datas)[i]);
CountSort(*tmp, 1000000);
CheckSort(*tmp);
*tmp = *((*total_datas)[i]);
RadixSort(*tmp);
CheckSort(*tmp);
*tmp = *((*total_datas)[i]);
QuickSort(*tmp);
*tmp1 = *((*total_datas)[i]);
MergeSort(*tmp1);
//比較兩種排序的結果是否一致
if(!CheckResult(*tmp, *tmp1))
{
cout << *((*total_datas)[i]) << endl;
break;
}
delete tmp;
delete tmp1;
cout << i << endl;
}
//檢驗時間
vector<SortFun> funs(5);
funs[0] = MergeSort;
funs[1] = RadixSort;
funs[2] = QuickSort;
funs[3] = CountSort;
funs[4] = HeapSort;
for(size_t idx = 0; idx < funs.size(); ++idx)
{
struct timeval start_time, end_time;
gettimeofday(&start_time, NULL);
for(size_t i = 0; i < total_datas->size(); ++i)
{
vector<int> *tmp = new vector<int>();
*tmp = *((*total_datas)[i]);
funs[idx](*tmp);
}
gettimeofday(&end_time, NULL);
cout << "runtime:" << (end_time.tv_sec-start_time.tv_sec)*1000 + (end_time.tv_usec-start_time.tv_usec)/1000 << endl;
}
return 0;
}
結果如下:
速度依次是:快速排序>歸併排序>堆排序>基數排序>計數排序
特殊情況下基數排序和計數排序可能更快,歸併排序和堆排序效率接近相等但都小於快排,其它三種蝸牛排序忽略