在VS2022中,实现多路归并外排序。
实现程序大致分为以下几个部分:
- 声明结构体及相关操作;结构体包含键和数据两部分,可以按键值从小到大排序;
- 创建三个列表,写入数据后存储到三个不同的文件中;
- 将文件中的数据关键字读取到内存中,完成多路归并排序(直接写入文件)。
实现如下:在VS2022中新建空项目,添加main.cpp文件。输入如下代码:
#include <afx.h>
#include <iostream>
#include <fstream>
#include <vector>
#include <sstream>
#include <direct.h> // c++ mkdir所在文件
#include <algorithm>
#pragma pack (push)
#pragma pack (1)
// 数据结构定义
typedef struct MyInfo
{
unsigned int key;
int data;
}MyInfo;
// 数据在文件中的特征
typedef struct MyInfoFeature{
unsigned int key; // 关键字
_int64 pos; // 在文件中位置
int fileNo; // 在打开文件列表中的编号
}MyInfoFeature;
#pragma pack (pop)
#define DATA_DIR "d:\\MyInfoData\\"
#define SORTED_DIR "d:\\MySortedData\\"
typedef std::vector<MyInfo> MyInfoVector; // 信息数组(列表)类.
typedef std::vector<MyInfoVector> MyInfoVectors; // 信息数组(列表)集合类.
typedef std::vector<MyInfoFeature> MyInfoFeatureVector; // 信息特征数组(列表)类.
typedef std::vector<MyInfoFeatureVector> MyInfoFeatureVectors; // 信息特征数组(列表)集合类.
void showMyInfo(MyInfo myInfo)
{
std::cout << "key: " << myInfo.key << ". data: " << myInfo.data << std::endl;
}
void showMyInfoVector(MyInfoVector& myInfoVector)
{
for (MyInfoVector::iterator it = myInfoVector.begin(); it != myInfoVector.end(); it++)
showMyInfo(*it);
}
void showMyInfoFeature(MyInfoFeature f)
{
std::cout << "key: " << f.key << ". pos: " << f.pos << ". fileNo: " << f.fileNo << std::endl;
}
void showMyInfoFeatureVector(MyInfoFeatureVector& fv)
{
for (MyInfoFeatureVector::iterator it = fv.begin(); it != fv.end(); it++)
showMyInfoFeature(*it);
}
// 比较两个特征大小的函数
bool lessThan(MyInfoFeature& f1, MyInfoFeature& f2)
{
return f1.key < f2.key;
}
// 声明并设置三个信息数组(列表).
MyInfoVector myInfoVector1, myInfoVector2, myInfoVector3;
void setMyInfoVectors()
{
MyInfo myInfo = {0};
{// 设置myInfoVector1
MyInfoVector& myInfoVector = myInfoVector1;
myInfo.key = 7; myInfo.data = myInfo.key * 10; myInfoVector.push_back(myInfo);
myInfo.key = 4; myInfo.data = myInfo.key * 10; myInfoVector.push_back(myInfo);
myInfo.key = 1; myInfo.data = myInfo.key * 10; myInfoVector.push_back(myInfo);
// 显示设置的内容.
std::cout << "myInfoVector1: " << std::endl;
showMyInfoVector(myInfoVector1);
}
{// 设置myInfoVector2
MyInfoVector& myInfoVector = myInfoVector2;
myInfo.key = 11; myInfo.data = myInfo.key * 10; myInfoVector.push_back(myInfo);
myInfo.key = 8; myInfo.data = myInfo.key * 10; myInfoVector.push_back(myInfo);
myInfo.key = 5; myInfo.data = myInfo.key * 10; myInfoVector.push_back(myInfo);
myInfo.key = 2; myInfo.data = myInfo.key * 10; myInfoVector.push_back(myInfo);
// 显示设置的内容.
std::cout << "myInfoVector2: " << std::endl;
showMyInfoVector(myInfoVector2);
}
{// 设置myInfoVector3
MyInfoVector& myInfoVector = myInfoVector3;
myInfo.key = 12; myInfo.data = myInfo.key * 10; myInfoVector.push_back(myInfo);
myInfo.key = 9; myInfo.data = myInfo.key * 10; myInfoVector.push_back(myInfo);
myInfo.key = 6; myInfo.data = myInfo.key * 10; myInfoVector.push_back(myInfo);
myInfo.key = 3; myInfo.data = myInfo.key * 10; myInfoVector.push_back(myInfo);
// 显示设置的内容.
std::cout << "myInfoVector3: " << std::endl;
showMyInfoVector(myInfoVector3);
}
}
// 将三个信息数组写入不同的文件中. 2022.4.18
int writeMyInfoVectors()
{
MyInfoVectors myInfoVectors;
myInfoVectors.push_back(myInfoVector1);
myInfoVectors.push_back(myInfoVector2);
myInfoVectors.push_back(myInfoVector3);
int i = 1;
std::stringstream ss;
std::ofstream ofs; // 输出文件
std::string fn; // 文件名
for (MyInfoVectors::iterator it = myInfoVectors.begin();
it != myInfoVectors.end();
it++)
{
ss.str(""); // 清空ss的字符串.
ss << DATA_DIR << "MyInfoData" << i++ << ".dat";
fn = ss.str();
ofs.open(fn, std::ios::binary | std::ios::out | std::ios::trunc); // 创建文件
if (ofs.bad()) return -1;
for (MyInfoVector::iterator itV = it->begin(); itV != it->end(); itV++) {
MyInfo& myInfo = *itV;
ofs.write((char*)&myInfo, sizeof(myInfo));
}
ofs.close(); // 关闭文件
}
return 0;
}
std::vector<std::string> openedFileNames; // 打开文件名列表
std::vector<std::ifstream*> ifsVector; // 打开输入文件列表
MyInfoFeatureVectors myInfoFeatureVectors;
// 多路归并排序.
// 将创建的文件中的数据读入myInfoFeatureVectors中.
// 对各路排序后,进行归并排序.
void mergeSort()
{
CFileFind finder;
CString strDir = DATA_DIR;
for (bool bWorking = finder.FindFile(strDir + "*.dat"); bWorking;) {
bWorking = finder.FindNextFile();
std::string fileName = finder.GetFilePath().GetBuffer();
std::ifstream* ifs = new std::ifstream;
std::cout << "正在打开文件" << fileName << std::endl;
ifs->open(fileName, std::ios::binary | std::ios::in);
if (ifs->bad()) {
delete ifs;
std::cout << fileName << "打开失败" << std::endl;
continue;
}else
std::cout << fileName << "打开成功!" << std::endl;
openedFileNames.push_back(fileName);
ifsVector.push_back(ifs);
}
MyInfo myInfo;
MyInfoFeature myInfoFeature;
MyInfoFeatureVectors myInfoFeatureVectors;
for (int i = 0; i < ifsVector.size(); i++) {
MyInfoFeatureVector myInfoFeatureVector;
std::ifstream& ifs = *ifsVector[i];
// 获取文件大小
std::streampos fSize = 0;
ifs.seekg(0, std::ios::end);
fSize = ifs.tellg();
std::cout << "已打开文件" << openedFileNames[i] << "的大小为: " << fSize << "字节." << std::endl;
// 读取并提取特征数据
for (ifs.seekg(0, std::ios::beg); ifs.tellg() < fSize; ) {
myInfoFeature.pos = ifs.tellg(); //
myInfoFeature.fileNo = i;
ifs.read((char*)&myInfo, sizeof(myInfo)); // 读取myInfo
// 提取特征.
myInfoFeature.key = myInfo.key;
// 加入数组中.
myInfoFeatureVector.push_back(myInfoFeature);
}
myInfoFeatureVectors.push_back(myInfoFeatureVector);
}
// 显示myInfoFeatureVectors的内容
std::cout << "各路排序前myInfoFeatureVectors的内容" << std::endl;
for (MyInfoFeatureVectors::iterator it = myInfoFeatureVectors.begin();
it != myInfoFeatureVectors.end(); it++) {
showMyInfoFeatureVector(*it);
}
// 各路排序
for (MyInfoFeatureVectors::iterator it = myInfoFeatureVectors.begin();
it != myInfoFeatureVectors.end(); it++) {
sort((*it).begin(), (*it).end(), ::lessThan);
}
// 显示myInfoFeatureVectors的内容
std::cout << "各路排序后myInfoFeatureVectors的内容" << std::endl;
for (MyInfoFeatureVectors::iterator it = myInfoFeatureVectors.begin();
it != myInfoFeatureVectors.end(); it++) {
showMyInfoFeatureVector(*it);
}
// 多路归并排序
std::ofstream ofs; // 输出文件
std::string fn = SORTED_DIR; fn += "merge.dat";// 文件名
ofs.open(fn, std::ios::binary | std::ios::out | std::ios::trunc); // 创建文件
if (ofs.bad()) return;
// 计算元素的总数
int nEleTotal = 0;
for (MyInfoFeatureVectors::iterator it = myInfoFeatureVectors.begin();
it != myInfoFeatureVectors.end(); it++) {
nEleTotal += it->size();
}
// 每个MyInfoFeatureVector的下标
std::vector<int> indexes; indexes.resize(myInfoFeatureVectors.size());
// 排序(核心代码)
for (int n = 0; n < nEleTotal; n++) {
unsigned int minKey = -1; // -1对应最大的无符号整数
int iMin = 0; // 最小的key所在的数组的编号
for (int i = 0; i < myInfoFeatureVectors.size(); i++) {
for (int j = indexes[i]; j < myInfoFeatureVectors[i].size(); j++) {
if (myInfoFeatureVectors[i][j].key < minKey) {
minKey = myInfoFeatureVectors[i][j].key;
iMin = i;
}
}
}
// 从myInfoFeatureVectors[iMin][indexes[iMin]]对应的文件中读取数据.
int jTemp = indexes[iMin];
ifsVector[iMin]->seekg(myInfoFeatureVectors[iMin][jTemp].pos, std::ios::beg);
ifsVector[iMin]->read((char*)&myInfo, sizeof(myInfo));
// 将读取的数据写入文件
ofs.write((char*)&myInfo, sizeof(myInfo));
// 改变对应的索引值.
indexes[iMin]++;
}
// 关闭文件并删除文件对象
for (std::vector<std::ifstream*>::iterator it = ifsVector.begin(); it != ifsVector.end(); it++) {
(*it)->close();
delete (*it);
}
ofs.close();
// 读取并显示排序后的文件. 检验排序结果.
fn = SORTED_DIR; fn += "merge.dat";// 文件名
std::ifstream ifs;
ifs.open(fn, std::ios::binary | std::ios::in);
// 获取文件大小
std::streampos fSize = 0;
ifs.seekg(0, std::ios::end);
fSize = ifs.tellg();
std::cout << "已打开文件" << fn << "的大小为: " << fSize << "字节." << std::endl;
// 读取并提取特征数据
for (ifs.seekg(0, std::ios::beg); ifs.tellg() < fSize; ) {
ifs.read((char*)&myInfo, sizeof(myInfo)); // 读取myInfo
showMyInfo(myInfo);
}
ifs.close();
}
int main()
{
// 设置三个信息数组(列表).
setMyInfoVectors();
// 创建目录
_mkdir(DATA_DIR);
_mkdir(SORTED_DIR);
// 将三个信息数组写入不同的文件中. 2022.4.18
writeMyInfoVectors();
// 将创建的文件中的数据读入myInfoFeatureVectors中.
mergeSort();
return 0;
}
为了使用MFC的类CFileFind,需要包含头文件afx.h,在编译时会出现错误:
fatal error C1189: #error: ?Building MFC application with /MD[d] (CRT dll version) requires MFC shared dll version. Please #define _AFXDLL or do not use /MD[d]
?
解决该错误的方法是,在项目的属性页中,将“配置属性-高级-MFC的使用”右侧的选项由“使用标准Windows库”改为“在静态库中使用MFC”或“在共享DLL中使用MFC”,如下图所示。
并将“字符集”改为“使用多字节字符集”。
程序运行结果如下:?
?
?
|