前言:利用Python进行数据处理,并通过第三方包的支持绘画出我国主要城市的分布图,并利用BFS广搜优先搜索计算出任意两个城市间的最短路线
相关Package
import re
import math
import networkx as nx
import matplotlib
import matplotlib.pyplot as plt
from collections import defaultdict
数据准备
以下是一些城市的经纬度信息:
coordination_source = """
{name:'兰州', geoCoord:[103.73, 36.03]},
{name:'嘉峪关', geoCoord:[98.17, 39.47]},
{name:'西宁', geoCoord:[101.74, 36.56]},
{name:'成都', geoCoord:[104.06, 30.67]},
{name:'石家庄', geoCoord:[114.48, 38.03]},
{name:'拉萨', geoCoord:[102.73, 25.04]},
{name:'贵阳', geoCoord:[106.71, 26.57]},
{name:'武汉', geoCoord:[114.31, 30.52]},
{name:'郑州', geoCoord:[113.65, 34.76]},
{name:'济南', geoCoord:[117, 36.65]},
{name:'南京', geoCoord:[118.78, 32.04]},
{name:'合肥', geoCoord:[117.27, 31.86]},
{name:'杭州', geoCoord:[120.19, 30.26]},
{name:'南昌', geoCoord:[115.89, 28.68]},
{name:'福州', geoCoord:[119.3, 26.08]},
{name:'广州', geoCoord:[113.23, 23.16]},
{name:'长沙', geoCoord:[113, 28.21]},
//{name:'海口', geoCoord:[110.35, 20.02]},
{name:'沈阳', geoCoord:[123.38, 41.8]},
{name:'长春', geoCoord:[125.35, 43.88]},
{name:'哈尔滨', geoCoord:[126.63, 45.75]},
{name:'太原', geoCoord:[112.53, 37.87]},
{name:'西安', geoCoord:[108.95, 34.27]},
//{name:'台湾', geoCoord:[121.30, 25.03]},
{name:'北京', geoCoord:[116.46, 39.92]},
{name:'上海', geoCoord:[121.48, 31.22]},
{name:'重庆', geoCoord:[106.54, 29.59]},
{name:'天津', geoCoord:[117.2, 39.13]},
{name:'呼和浩特', geoCoord:[111.65, 40.82]},
{name:'南宁', geoCoord:[108.33, 22.84]},
//{name:'西藏', geoCoord:[91.11, 29.97]},
{name:'银川', geoCoord:[106.27, 38.47]},
{name:'乌鲁木齐', geoCoord:[87.68, 43.77]},
{name:'香港', geoCoord:[114.17, 22.28]},
{name:'澳门', geoCoord:[113.54, 22.19]}
"""
数据处理
一般初始数据都不是容易进行处理的,并且有许多我们不需要的信息,需要对主要信息进行提取
def get_city_info(city_coordination):
city_location = {}
for line in city_coordination.split("\n"):
if line.startswith("//"): continue
if line.strip() == "":continue
city = re.findall("name:'(\w+)'",line)[0]
x_y = re.findall("Coord:\[(\d+.\d+),\s(\d+.\d+)\]",line)[0]
x_y = tuple(map(float,x_y))
city_location[city] = x_y
return city_location
数据处理完毕后打印写结果:
city_info = get_city_info(coordination_source)
print(city_info)
{'兰州': (103.73, 36.03),
'嘉峪关': (98.17, 39.47),
'西宁': (101.74, 36.56),
'成都': (104.06, 30.67),
'石家庄': (114.48, 38.03),
'拉萨': (102.73, 25.04),
'贵阳': (106.71, 26.57),
'武汉': (114.31, 30.52),
'郑州': (113.65, 34.76),
'济南': (117.0, 36.65),
'南京': (118.78, 32.04),
'合肥': (117.27, 31.86),
'杭州': (120.19, 30.26),
'南昌': (115.89, 28.68),
'福州': (119.3, 26.08),
'广州': (113.23, 23.16),
'长沙': (113.0, 28.21),
'沈阳': (123.38, 41.8),
'长春': (125.35, 43.88),
'哈尔滨': (126.63, 45.75),
'太原': (112.53, 37.87),
'西安': (108.95, 34.27),
'北京': (116.46, 39.92),
'上海': (121.48, 31.22),
'重庆': (106.54, 29.59),
'天津': (117.2, 39.13),
'呼和浩特': (111.65, 40.82),
'南宁': (108.33, 22.84),
'银川': (106.27, 38.47),
'乌鲁木齐': (87.68, 43.77),
'香港': (114.17, 22.28),
'澳门': (113.54, 22.19)}
计算两个城市间的距离
def geo_distance(origin, destination):
"""
Calculate the Haversine distance.
Examples
--------
>>> origin = (48.1372, 11.5756) # Munich
>>> destination = (52.5186, 13.4083) # Berlin
>>> round(distance(origin, destination), 1)
504.2
"""
lat1, lon1 = origin
lat2, lon2 = destination
radius = 6371
dlat = math.radians(lat2 - lat1)
dlon = math.radians(lon2 - lon1)
a = (math.sin(dlat / 2) * math.sin(dlat / 2) +
math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) *
math.sin(dlon / 2) * math.sin(dlon / 2))
c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
d = radius * c
return d
def get_city_distance(city1,city2):
return geo_distance(city_info[city1],city_info[city2])
因为需要直到每个城市与周围城市的相连情况,需要根据情况进行连接
def build_connection(city_info):
cities_connection = defaultdict(list)
cities = list(city_info.keys())
for c1 in cities:
for c2 in cities:
if c1 == c2 : continue
if get_city_distance(c1,c2) < threshold:
cities_connection[c1].append(c2)
return cities_connection
结果测试:
cities_connection = build_connection(city_info)
#绘制连接线
cities_connection_graph = nx.Graph(cities_connection)
nx.draw(cities_connection_graph,city_info,with_labels=True,node_size=10)
def sort_by_distance(pathes):
def get_distance_of_path(path):
distance = 0
for i,_ in enumerate(path[:-1]):
distance += get_city_distance(path[i],path[i+1])
return distance
return sorted(pathes,key=get_distance_of_path)
def BFS(graph, start, destination, search_strategy):
pathes = [[start]]
while pathes:
path = pathes.pop(0)
froniter = path[-1]
successsors = graph[froniter]
for city in successsors:
if city in path: continue
new_path = path + [city]
pathes.append(new_path)
pathes = search_strategy(pathes)
if pathes and (destination == pathes[0][-1]):
return pathes[0]
主函数代码如下:
if __name__ == '__main__':
city_info = get_city_info(coordination_source)
city_graph = nx.Graph()
city_graph.add_nodes_from(list(city_info.keys()))
matplotlib.rcParams['font.sans-serif'] = ['KaiTi']
nx.draw(city_graph, city_info, with_labels=True, node_size=10)
cities_connection = build_connection(city_info)
cities_connection_graph = nx.Graph(cities_connection)
nx.draw(cities_connection_graph, city_info, with_labels=True, node_size=8)
print(BFS(cities_connection, "北京", "上海", search_strategy=sort_by_distance))
print(BFS(cities_connection, "郑州", "上海", search_strategy=sort_by_distance))
输出如下:
['北京', '天津', '上海']
['郑州', '南京', '上海']
|