海量不重複資料的生成
阿新 • • 發佈:2019-01-09
前幾天看到了一個專案需求,自己嘗試寫了下3億行資料大概需要20分鐘的時間,普通硬碟應該要慢些,記憶體對映或者記憶體盤應該會更快一點,懶得驗證了,放出程式碼供學習下,演算法是以前偶然看到得很巧的的演算法,二次獲取隨機數是為了資料更隨機(待驗證,我覺得不需要)
#include "stdafx.h" #include <chrono> #include <iostream> #include <memory> #include <random> #include <strsafe.h> using namespace std; int GetRandomNum(int min, int max,int seed) { //srand((unsigned)time(NULL)); //生成種子 srand(seed); return( rand() % (max - min) + min); } int main() { //int nSericalLenth = 10; chrono::steady_clock::time_point t1 = chrono::steady_clock::now(); int64_t nArrayLength = 300000000; FILE* pFile = 0; TCHAR szBuffer[128]; memset(szBuffer, 0, sizeof(szBuffer)); try { std::random_device rd; std::uniform_int_distribution<int> dist(0, 9999999); //std::cout << dist(rd) << std::endl; _tfopen_s(&pFile, _T("D:\\num.txt"), _T("wt")); if (pFile != nullptr) { unique_ptr<int[]> needArray(new int[nArrayLength]); for (int idx = 0; idx < nArrayLength; idx++) { needArray[idx] = idx+283823423; } int end = nArrayLength - 1; for (int i = 0; i < nArrayLength; i++) { int num = GetRandomNum(0, end + 1, dist(rd)); StringCbPrintf(szBuffer, sizeof(szBuffer), _T("%010d\n"), needArray[num]); fwrite(szBuffer, 1, _tcslen(szBuffer), pFile); //output[i] = needArray[num]; needArray[num] = needArray[end]; end--; } fclose(pFile); } } catch (...) { } chrono::steady_clock::time_point t2 = chrono::steady_clock::now(); cout << chrono::duration_cast<std::chrono::seconds>(t2 - t1).count() << endl; return 0; }