前言
本人使用C++语言,和GDAL库来实现K-means算法
一、代码解释
1、K-means算法解释
K-means聚类方法,又称K均值聚类,主要是将需要聚类的对象分为K类(K个簇)。
2、算法原理
首先,选出K个聚类中心(亦称为K个簇中心),然后将每一个待分类的对象分到离其最近的聚类中心所在的类中,然后重新更新每一个聚类(簇)的中心,反复迭代计算,直至满足没有(或最小数目)对象被重新分配给不同的聚类,没有(或最小数目)聚类中心再发生变化,或者达到最大的迭代次数为止。
3、算法具体解释
1、在K-means算法中,其最初聚类中心的选择会影响迭代次数以及最后的聚类效果,本次实验选取初始聚类中心的方法如下: 首先随机在聚类对象选出一个初始聚类中心,记为A,再在剩余的聚类对象中选出离A最远的对象为第二个聚类中心,记为B,然后在在剩余的聚类对象中,选出与已有的聚类中心(A,B)中的最小距离最大者,作为新的聚类中心,依次类推,选出与已有的中心最小距离最大者,作为新的聚类中心,直至聚类中心个数满足要求为止。 2、迭代停止条件,由于本次实验算法为有迭代思想,需定义实验终止条件。本次实验终止条件为:没有聚类中心再发生变化。 3、代码提示: #创建 k 个点作为起始质心(1、中的方法创建) #while(不满足跳出迭代的条件) { for(对每一个对象) { #找出离其最近的聚类中心,并更新对象的所属属性,与之前的所属属性对比 } #更新聚类中心(簇中心),并算出与前一次相比的中心变化 } #对象的输出,并显示
二、算法实现
代码如下:
#include <stdio.h>
#include "gdal_priv.h"
#include "ogrsf_frmts.h"
#include <math.h>
#include <algorithm>
#include<iostream>
#include<vector>
using namespace std;
struct Centor
{
double x;
double y;
};
double getDistance(double x1, double y1, double x2, double y2)
{
return ((x1 - x2)*(x1 - x2) + (y1 - y2)*(y1 - y2));
}
static int random()
{
srand((unsigned)time(NULL));
int n, m;
n = 799;
m = 0;
int num = rand() % (n - m + 1) + m;
return num;
}
OGRLayer* Open(string strFilePath)
{
GDALAllRegister();
CPLSetConfigOption("GDAL_FILENAME_IS_UTF8", "NO");
GDALDataset* poSrcShp;
poSrcShp = (GDALDataset *)GDALOpenEx(strFilePath.c_str(), GA_Update, NULL, NULL, NULL);
if (poSrcShp == NULL)
{
std::cout << "数据打开失败!" << std::endl;
return NULL;
}
OGRLayer *poLayer;
poLayer = poSrcShp->GetLayer(0);
return poLayer;
}
void getPoints(string strFilePath, vector<int>& point)
{
OGRLayer* poLayer = Open(strFilePath);
OGRFeature *poFeature = poLayer->GetFeature(0);
poLayer->ResetReading();
cout << "请输入选择簇心点数" << endl;
int num;
cin >> num;
int q = random();
point.push_back(q);
double distance = 0;
int num2 = point.size();
double maxDistance = 0;
int Fid = 0;
int nFeatureCount = poLayer->GetFeatureCount();
for (int n=1 ; n < num; n++)
{
maxDistance = -1;
for (int i = 0; i < nFeatureCount; i++)
{
double dMin = 9999999999999999999;
OGRFeature* poOtherPoint = poLayer->GetFeature(i);
double x2 = poOtherPoint->GetFieldAsDouble(1);
double y2 = poOtherPoint->GetFieldAsDouble(2);
for (int m =0;m< num2;m++)
{
OGRFeature * poTempPoint = poLayer->GetFeature(point[m]);
double x3 = poTempPoint->GetFieldAsDouble(1);
double y3 = poTempPoint->GetFieldAsDouble(2);
distance = getDistance(x2, y2, x3, y3);
if (distance<dMin)
{
dMin = distance;
}
}
if (dMin > maxDistance)
{
Fid = i;
maxDistance = dMin;
}
}
point.push_back(Fid);
num2++;
}
}
void reset(OGRLayer*poLayer, vector<Centor>¢orList, vector<int>&point )
{
int nFeatureCount = poLayer->GetFeatureCount();
OGRFeature* poFeature = NULL;
Centor resetCentor;
for (int n = 0; n < point.size(); n++)
{
double sumX = 0, sumY = 0;
int num = 0;
for (int m = 0; m < nFeatureCount; m++)
{
poFeature = poLayer->GetFeature(m);
int from = poFeature->GetFieldAsInteger("from");
if (from == point[n])
{
sumX += poFeature->GetFieldAsDouble(1);
sumY += poFeature->GetFieldAsDouble(2);
num++;
}
}
sumX = sumX / double(num);
sumY = sumY / double(num);
resetCentor.x = sumX;
resetCentor.y = sumY;
centorList[n] = resetCentor;
}
}
bool equal(vector<Centor>¢orList,vector<Centor>¢orList2)
{
int n = centorList.size();
if (centorList.empty() || centorList2.empty())
{
return 0;
}
for (int i = 0; i < n; i++)
{
if (centorList[i].x != centorList2[i].x || centorList[i].y != centorList2[i].y)
{
return 0;
}
}
return 1;
}
void createShp(string strFilePath,string outPutPath)
{
CPLSetConfigOption("GDAL_FILENAME_IS_UTF8", "NO");
const char *pszDriverName = "ESRI Shapefile";
GDALDriver *poDriver = OGRSFDriverRegistrar::GetRegistrar()->GetDriverByName(pszDriverName);
const char* datasetPath = strFilePath.c_str();
GDALDataset*poSrcData = (GDALDataset *)GDALOpenEx(datasetPath,GA_Update, NULL, NULL, NULL);
GDALDataset *poDstData = poDriver->CreateCopy(outPutPath.c_str(),poSrcData, 0, 0, 0, NULL);
OGRLayer*poLayer = poDstData->GetLayer(0);
OGRFieldDefn ofrom("from", OFTInteger);
ofrom.SetWidth(6);
poLayer->CreateField(&ofrom);
GDALClose(poDstData);
}
void kMeans(string strFilePath,vector<int>&point)
{
cout << "请输入输出shp文件路径" << endl;
cout << "最后在shp文件尾加上簇心点数" << endl;
string outPutPath;
cin >> outPutPath;
createShp(strFilePath,outPutPath);
int n = point.size();
Centor resetCentor;
vector<Centor>centorList;
vector<Centor>currentCentorList;
OGRLayer*poLayer = Open(outPutPath);
for (int i = 0; i < n; i++)
{
resetCentor.x = poLayer->GetFeature(point[i])->GetFieldAsDouble(1);
resetCentor.y = poLayer->GetFeature(point[i])->GetFieldAsDouble(2);
centorList.push_back(resetCentor);
}
int nFeatureCount = poLayer->GetFeatureCount();
int Fid = 0;
while (!equal(centorList,currentCentorList))
{
for (int i = 0; i < nFeatureCount; i++)
{
double d_Min = 9999999999999999999;
OGRFeature *poFeature = poLayer->GetFeature(i);
for (int j = 0; j < n; j++)
{
double x1 = poFeature->GetFieldAsDouble(1);
double y1 = poFeature->GetFieldAsDouble(2);
double x2 = centorList[j].x;
double y2 = centorList[j].y;
double ddistance = getDistance(x1, y1, x2, y2);
if (d_Min > ddistance)
{
d_Min = ddistance;
Fid = point[j];
}
}
poFeature->SetField("from", Fid);
poLayer->SetFeature(poFeature);
}
currentCentorList = centorList;
reset(poLayer, centorList, point);
}
}
void main()
{
cout << "请输入原始shp文件路径" << endl;
string strFilePath;
cin >> strFilePath;
vector<int>point;
getPoints(strFilePath,point);
kMeans(strFilePath,point);
}
总结
本代码需要输入初始shp文件、选择簇心点个数和结果shp文件地址。操作如图: 用arcmap读取数据并符号化表示: 此为三个簇心选择结果
5个簇心选择结果 8个簇心选择结果
|