參考文章:http://www.cnblogs.com/yicoder/
因為最近有幾個實驗需要處理大型數據,因為需要讀取的是一千萬個double型的數據,雖然不要求快速讀取文件數據,但是實在是無法忍受那幾十秒鐘的停頓。所以上網搜了下關於大數據的處理。
雖然可以利用scanf()提高讀取的速度,但還是有幾十秒鐘的停頓。所以在這裡選擇使用fread()讀取出所有的字符。
為了方便實驗,先寫了個生成 0< n <10,的double型數據n的文件數據。這個沒做優化,時間較久。
源碼:
void product_data ()
{
uniform_int_distribution<unsigned> u(0, 9999);
default_random_engine e (time(0));
freopen ("data.txt", "w", stdout);
for (int i = 0; i < 10000000; i++){
cout << (double)u(e)/1000 << endl;
}
fclose(stdout);
}
各種方法的結果對比

可見使用fread讀取整個文件要比其他兩種方法更快。
cin 比 scand 慢的原因很清楚,流數據處理比標准化處理要慢。而用fread更快的原因是吧所有數據當作一個字符串來讀取,一次讀入整個文件,這種方法的主要時間開銷是把字符轉換成要求的數值。
雖然用fread讀取要快得多,但是這種方法卻有很大的缺陷,buffer數組的容量要比文件所包含的所有字符都要大,要不然不能讀取出所有的數據,或許可以利用分塊處理解決這個問題。如果各位大蝦有更好的方法請指點一二。
源碼:
#include <iostream>
#include <fstream>
#include <ctime>
#include <random>
#include <cstdio>
#define N 10000000
#define M 70000000
using namespace std;
void cin_read()
{
time_t start_time = clock ();
double *nums = new double [N];
freopen ("data.txt", "r", stdin);
for (int i = 0; i < N; i++){
cin >> nums[i];
}
time_t end_time = clock ();
fclose (stdin);
cout << "cin_read time: " << end_time - start_time << "ms" << endl;
}
void scanf_read()
{
time_t start_time = clock ();
double *nums = new double [N];
freopen ("data.txt", "r", stdin);
for (int i = 0; i < N; i++){
scanf ("%lf", &nums[N]);
}
time_t end_time = clock ();
fclose (stdin);
cout << "scanf_read time: " << end_time - start_time << "ms" << endl;
}
double* transform_num(char* buffer, int lenght)
{
bool is_dec = false;
double *nums = new double [M];
int k = 0;
int nt = 10;
for (int i = 0; i < lenght; i++){
if (buffer [i] == '\n'){
k++;
nt = 10;
continue;
}
else if (buffer [i] == '.')is_dec = true;
if (!is_dec)nums[k] = buffer[i] - '0';
else {
nums[k] += (double)(buffer[i] - '0')/nt;
nt = nt*nt;
}
}
}
void fread_read()
{
time_t start_time = clock ();
freopen ("data.txt", "r", stdin);
char *buffer = new char [M]; //把所有字符一次全部讀取到buf裡面,包括'\n'
int lenght = fread (buffer, 1, M, stdin);
double *nums = transform_num (buffer, lenght);
time_t end_time = clock ();
fclose (stdin);
cout << "fread_read time: " << end_time - start_time << "ms" << endl;
}
void product_data ()
{
uniform_int_distribution<unsigned> u(0, 9999);
default_random_engine e (time(0));
freopen ("data.txt", "w", stdout);
for (int i = 0; i < 10000000; i++){
cout << (double)u(e)/1000 << endl;
}
fclose(stdout);
}
int main()
{
cout << "*******************************" << endl;
cin_read();
cout << "*******************************" << endl;
scanf_read();
cout << "*******************************" << endl;
fread_read();
cout << "*******************************" << endl;
return 0;
}
完。