??本文全部代码已上传Gitee。
??之前我们介绍的排序算法都是一种“内排序”的算法,也就是把数据搞到内存中排序,但是如果数据量太大,无法一次性弄到内存中怎么办呢?这种情况下的排序称为外排序。
??我们这里可以借助归并的思想进行外排序,首先,我们把这个超大的数据分成N份,使得每份文件单独都可以读到内存中来。
??然后我们在内存中对读进来的数据(通常放在一个数组中,数据过大,建议使用malloc在堆上分配内存),使用一个性能不错的排序(如快速排序或堆排序,这里不能用归并排序,因为归并排序需要额外的空间,数据量太大可能不够空间给归并排序开额外的空间),然后把数据写回文件。
void subsort()
{
FILE* pf = fopen("testdata.txt", "r");
if (pf == NULL)
{
perror("fopen");
exit(-1);
}
int n = M;
int* a = (int*)malloc(sizeof(int) * M);
if (a == NULL)
{
printf("malloc fail\n");
exit(-1);
}
int num = 0;
int i = 0;
char subfile[20];
int filei = 0;
while (fscanf(pf, "%d\n", &num) != EOF)
{
if (i < n - 1)
{
a[i++] = num;
}
else
{
a[i] = num;
QuickSort(a, 0, n - 1);
sprintf(subfile, "sub\\sub_sort%d.txt", filei++);
FILE* psubf = fopen(subfile, "w");
if (psubf == NULL)
{
perror("fopen");
exit(-1);
}
for (int i = 0; i < n; i++)
{
fprintf(psubf, "%d\n", a[i]);
}
fclose(psubf);
i = 0;
}
}
fclose(pf);
free(a);
}
??然后进行归并:文件0和文件1进行归并,结果写到文件12,然后文件12和文件3进行归并,结果写到文件123中。。。直到所有部分文件都被处理结束为止。
??对文件的归并操作和对数组的归并很像,如归并文件1和文件2,使用fscanf读取格式字符串,放到num1和num2中,然后比较num1和num2大小,如果num1更小,就把num1写到归并结果文件中,然后使用fscanf再读取一个文件1中的格式化字符串放到num1中;反之对num2和文件2执行同样操作,直到一个文件读取结束fscanf返回EOF为止。
??然后继续fscanf检查文件1和文件2,如果fscanf的返回值不是EOF,说明这个文件没读取完,然后把没读取完的文件继续读取写到归并结果文件中。
void _MergefileSort(char* file1, char* file2, char* mfile)
{
FILE* pf1 = fopen(file1, "r");
FILE* pf2 = fopen(file2, "r");
FILE* pmf = fopen(mfile, "w");
int num1;
int num2;
int ret1 = fscanf(pf1, "%d\n", &num1);
int ret2 = fscanf(pf2, "%d\n", &num2);
while (ret1 != EOF && ret2 != EOF)
{
if (num1 < num2)
{
fprintf(pmf, "%d\n", num1);
ret1 = fscanf(pf1, "%d\n", &num1);
}
else
{
fprintf(pmf, "%d\n", num2);
ret2 = fscanf(pf2, "%d\n", &num2);
}
}
while (ret1 != EOF)
{
fprintf(pmf, "%d\n", num1);
ret1 = fscanf(pf1, "%d\n", &num1);
}
while (ret2 != EOF)
{
fprintf(pmf, "%d\n", num2);
ret2 = fscanf(pf2, "%d\n", &num2);
}
fclose(pf1);
fclose(pf2);
fclose(pmf);
}
void MergefileSort()
{
char file1[100] = "sub\\sub_sort0.txt";
char file2[100];
char mfile[100];
for (int i = 1; i < N; i++)
{
sprintf(file2, "sub\\sub_sort%d.txt", i);
if (i == N - 1)
{
sprintf(mfile, "ret.txt");
}
else
{
sprintf(mfile,
"merge\\sub_merge%d%d.txt", i - 1, i);
}
_MergefileSort(file1, file2, mfile);
strcpy(file1, mfile);
}
}
汇总:
#define _CRT_SECURE_NO_WARNINGS 1
#define M 10
#define N 10
#include <stdlib.h>
#include <stdio.h>
#include <time.h>
#include <string.h>
void makeTestdata()
{
FILE* ptdf = fopen("testdata.txt", "a");
if (ptdf == NULL)
{
perror("fopen");
return -1;
}
int a;
for (int i = 0; i < M; i++)
{
a = rand();
fprintf(ptdf, "%d\n", a);
}
fclose(ptdf);
}
void inittestdata()
{
FILE* ptdf = fopen("testdata.txt", "w");
if (ptdf == NULL)
{
perror("fopen");
exit(-1);
}
srand((time(NULL)));
int a;
for (int i = 0; i < M; i++)
{
a = rand();
fprintf(ptdf, "%d\n", a);
}
fclose(ptdf);
}
void testdatamake()
{
for (int i = 0; i < N; i++)
{
if (i == 0)
{
inittestdata();
}
else
{
makeTestdata();
}
}
}
void swap(int* x, int* y)
{
if (*x == *y)
{
return;
}
*x = (*x) ^ (*y);
*y = (*x) ^ (*y);
*x = (*x) ^ (*y);
}
int Getmidindex(int* a, int left, int right)
{
int mid = left + (right - left) / 2;
if (a[mid] > a[left])
{
if (a[right] > a[mid])
{
return mid;
}
else if (a[right] > a[left])
{
return right;
}
else
{
return left;
}
}
else
{
if (a[right] > a[left])
{
return left;
}
else if (a[right] > a[mid])
{
return right;
}
else
{
return mid;
}
}
}
int PartSort(int* a, int left, int right)
{
int midi = Getmidindex(a, left, right);
swap(&a[left], &a[midi]);
int key = a[left];
int prev = left;
int cur = prev + 1;
while (cur <= right)
{
if (a[cur] < key && ++prev != cur)
{
swap(&a[cur], &a[prev]);
}
cur++;
}
swap(&a[left], &a[prev]);
return prev;
}
void QuickSort(int* a, int left, int right)
{
if (left >= right)
return;
int keyi = PartSort(a, left, right);
QuickSort(a, left, keyi - 1);
QuickSort(a, keyi + 1, right);
}
void subsort()
{
FILE* pf = fopen("testdata.txt", "r");
if (pf == NULL)
{
perror("fopen");
exit(-1);
}
int n = M;
int* a = (int*)malloc(sizeof(int) * M);
if (a == NULL)
{
printf("malloc fail\n");
exit(-1);
}
int num = 0;
int i = 0;
char subfile[20];
int filei = 0;
while (fscanf(pf, "%d\n", &num) != EOF)
{
if (i < n - 1)
{
a[i++] = num;
}
else
{
a[i] = num;
QuickSort(a, 0, n - 1);
sprintf(subfile, "sub\\sub_sort%d.txt", filei++);
FILE* psubf = fopen(subfile, "w");
if (psubf == NULL)
{
perror("fopen");
exit(-1);
}
for (int i = 0; i < n; i++)
{
fprintf(psubf, "%d\n", a[i]);
}
fclose(psubf);
i = 0;
}
}
fclose(pf);
free(a);
}
void _MergefileSort(char* file1, char* file2, char* mfile)
{
FILE* pf1 = fopen(file1, "r");
FILE* pf2 = fopen(file2, "r");
FILE* pmf = fopen(mfile, "w");
int num1;
int num2;
int ret1 = fscanf(pf1, "%d\n", &num1);
int ret2 = fscanf(pf2, "%d\n", &num2);
while (ret1 != EOF && ret2 != EOF)
{
if (num1 < num2)
{
fprintf(pmf, "%d\n", num1);
ret1 = fscanf(pf1, "%d\n", &num1);
}
else
{
fprintf(pmf, "%d\n", num2);
ret2 = fscanf(pf2, "%d\n", &num2);
}
}
while (ret1 != EOF)
{
fprintf(pmf, "%d\n", num1);
ret1 = fscanf(pf1, "%d\n", &num1);
}
while (ret2 != EOF)
{
fprintf(pmf, "%d\n", num2);
ret2 = fscanf(pf2, "%d\n", &num2);
}
fclose(pf1);
fclose(pf2);
fclose(pmf);
}
void MergefileSort()
{
char file1[100] = "sub\\sub_sort0.txt";
char file2[100];
char mfile[100];
for (int i = 1; i < N; i++)
{
sprintf(file2, "sub\\sub_sort%d.txt", i);
if (i == N - 1)
{
sprintf(mfile, "ret.txt");
}
else
{
sprintf(mfile,
"merge\\sub_merge%d%d.txt", i - 1, i);
}
_MergefileSort(file1, file2, mfile);
strcpy(file1, mfile);
}
}
int main()
{
testdatamake();
subsort();
int begin1 = clock();
MergefileSort();
int end1 = clock();
printf("排序%d个数据用时:%dms\n", M * N, end1 - begin1);
return 0;
}
|