1. 外排序
外排序(External sorting)是指能够处理极⼤量数据的排序算法。通常来说,外排序处理的数据不能 ⼀次装⼊内存,只能放在读写较慢的外存储器(通常是硬盘)上。外排序通常采⽤的是⼀种“排序-归并”的策略。在排序阶段,先读⼊能放在内存中的数据量,将其排序输出到⼀个临时⽂件,依此进⾏,将待排序数据组织为多个有序的临时⽂件。然后在归并阶段将这些临时⽂件组合为⼀个⼤的有序⽂件,也即排序结果。
跟外排序对应的就是内排序,之前小编讲的常⻅的排序,都是内排序,他们排序思想适应的是数据在内存中,⽀持随机访问。归并排序的思想不需要随机访问数据,只需要依次按序列读取数据,所以归并排序既是⼀个内排序,也是⼀个外排序。
2.创建随机数据文件
创建N个随机数,写到文件中,这里可以将N定义大一点,说明有大量数据。
void CreateData()
{int N = 100000;srand(time(0));const char* file = "data.txt";FILE* fin = fopen(file, "w");if (fin == NULL){perror("fopen fail");exit(-1);}for (int i = 0; i < N; i++){int x = rand() + i;fprintf(fin,"%d\n", x);}fclose(fin);
}
3.文件归并排序思路
1. 读取n个值排序后写到file1,再读取n个值排序后写到file2。
2. file1和file2利⽤归并排序的思想,依次读取⽐较,取⼩的尾插到mfile,mfile归并为⼀个有序⽂件。
3. 将file1和file2删掉,mfile重命名为file1。
4. 再次读取n个数据排序后写到file2。
5. 继续⾛file1和file2归并,重复步骤2,直到⽂件中⽆法读出数据。最后归并出的有序数据放到了 file1中。
这里用到两个函数,remove是将文件移除,rename是将传入的第1个参数的文件名更改为传入的第2个参数的文件名。
#include<stdio.h>
#include<stdlib.h>
#include<time.h>void CreateData()
{int N = 100000;srand(time(0));const char* file = "data.txt";FILE* fin = fopen(file, "w");if (fin == NULL){perror("fopen fail");exit(-1);}for (int i = 0; i < N; i++){int x = rand() + i;fprintf(fin,"%d\n", x);}fclose(fin);
}int cmp(const void* a, const void* b)
{return (*(int*)a - *(int*)b);
}int ReadNNumSortToFile(FILE* fout, int n, const char* file1)
{int x = 0;int* a = (int*)malloc(sizeof(int) * n);if (a == NULL){perror("malloc fail");return 0;}int j = 0;for (int i = 0; i < n; i++){if (fscanf(fout, "%d", &x) == EOF)break;a[j++] = x;}if (j == 0){free(a);return 0;}qsort(a, j, sizeof(int), cmp);FILE* fin = fopen(file1, "w");if (fin == NULL){free(a);perror("fopen fail");return 0;}for (int i = 0; i < j; i++){fprintf(fin, "%d\n", a[i]);}free(a);fclose(fin);return j;}void MergeFile(const char*file1, const char* file2, const char* mfile)
{FILE* fout1 = fopen(file1, "r");if (fout1 == NULL){perror("fopen fail");return;}FILE* fout2 = fopen(file2, "r");if (fout2 == NULL){perror("fopen fail");return;}FILE* mfin = fopen(mfile, "w");if (mfin == NULL){perror("fopen fail");return;}int x1 = 0;int x2 = 0;int ret1 = fscanf(fout1, "%d",&x1);int ret2 = fscanf(fout2, "%d",&x2);while (ret1 != EOF && ret2 != EOF){if (x1 < x2){fprintf(mfin, "%d\n", x1);ret1 = fscanf(fout1, "%d", &x1);}else{fprintf(mfin, "%d\n", x2);ret2 = fscanf(fout2, "%d", &x2);}while (ret1 != EOF){fprintf(mfin, "%d\n", x1);ret1 = fscanf(fout1, "%d", &x1);}while (ret2 != EOF){fprintf(mfin, "%d\n", x2);ret2 = fscanf(fout2, "%d", &x2);}}fclose(fout1);fclose(fout2);fclose(mfin);
}int main()
{ CreateData();const char* file1 = "file1.txt";const char* file2 = "file2.txt";const char* mfile = "mfile.txt";FILE* fout = fopen("data.txt", "r");if (fout == NULL){perror("fopen fail");exit(-1);}int m = 10000;ReadNNumSortToFile(fout,m,file1);ReadNNumSortToFile(fout,m,file2);while (1){MergeFile(file1,file2,mfile);remove(file1);remove(file2);rename(mfile, file1);int n = 0;if ((n =ReadNNumSortToFile(fout, m, file2))==0){break;}}return 0;
}