操作思路及流程:
数据获取
需要的Package: requests
r = requests.get('http://map.amap.com/service/subway?_1469083453978&srhdata=1100_drw_beijing.json')
r.encoding = r.apparent_encoding
print(r.text)
部分数据如下:
通过返回的信息[一系列的字典类型]我们可以找到一些貌似是我们想要的键值对数据[站名 、经纬度坐标 等]
数据处理及提取
需要的Package: re 、numpy 、networkx
提取前我们需要想一下我们想要获得什么数据?
- 所有线路信息
- 字典存储dict:key:线路名称[字符串];value:站点名称[列表]
- 所有站点信息
- 字典存储dict:key:站点名称[字符串];value:站点坐标[元组]
def get_lines_stations_info(text):
lines_info = {}
stations_info = {}
pattern = re.compile('"st".*?"kn"')
lines_list = pattern.findall(text)
for i in range(len(lines_list)):
pattern = re.compile('"ln":".*?"')
line_name = pattern.findall(lines_list[i])[0][6:-1]
pattern = re.compile('"rs".*?"sp"')
temp_list = pattern.findall(lines_list[i])
station_name_list = []
for j in range(len(temp_list)):
pattern = re.compile('"n":".*?"')
station_name = pattern.findall(temp_list[j])[0][5:-1]
station_name_list.append(station_name)
pattern = re.compile('"sl":".*?"')
position = tuple(map(float, pattern.findall(temp_list[j])[0][6:-1].split(',')))
stations_info[station_name] = position
lines_info[line_name] = station_name_list
return lines_info, stations_infos
每写完一个函数,对其进行以下测试:
lines_info, stations_info = get_lines_stations_info(r.text)
print(lines_info)
print(stations_info)
数据可视化
import networkx as nx
matplotlib.rcParams['font.sans-serif'] = ['SimHei']
plt.figure(figsize=(20, 20))
stations_graph = nx.Graph()
stations_graph.add_nodes_from(list(stations_info.keys()))
nx.draw(stations_graph, stations_info, with_labels=True, node_size=6)
plt.show()
图的创建
图一般有两种存储格式:邻接矩阵 和邻接表
此次采用邻接表建图:
def add_sta2tosta1(info,sta1,sta2):
list_tmp = info.get(sta1)
if not list_tmp:
list_tmp = []
list_tmp.append(sta2)
info[sta1] = list_tmp
return info
def create_map(lines_info):
neighbor_info = {}
for line_name,station_list in lines_info.items():
for i in range(len(station_list) - 1):
sta_one = station_list[i]
sta_two = station_list[i+1]
neighbor_info = add_sta2tosta1(neighbor_info,sta_one,sta_two)
neighbor_info = add_sta2tosta1(neighbor_info,sta_two,sta_one)
return neighbor_info
对函数进行测试:
nb_info = create_map(lines_info)
print(nb_info)
有了邻接表我们就可以对地图进行搜索遍历,在这之前,我们在对原来的图根据邻接表信息添加上折线
stations_connection_graph = nx.Graph(nb_info)
plt.figure(figsize=(30, 30))
nx.draw(stations_connection_graph, stations_info, with_labels=True, node_size=6)
plt.show()
谢谢 有被图丑 到^ ^
搜索算法设计
方法1:非启发式搜索 DFS、BFS
如果我们想找到一条最短路径,可以用DFS 吗?答案是不可以。
- 当求解目的是必须找到最优解时,要使用
BFS 。 - 当求解目标是只要找到解即可,而且内存大小受限,则要使用
DFS 。
def BFS(lines_info,neighbor_info,start_station,end_station):
if (not neighbor_info.get(start_station)) or (not neighbor_info.get(end_station)):
return None
paths = {}
paths[start_station] = [start_station]
while True:
paths_tmp = {}
for (k,val) in paths.items():
neighbors = neighbor_info[k].copy()
if (len(val) >= 2):
pre_station = val[-2]
neighbors.remove(pre_station)
for station in neighbors:
if station in paths:
continue
path = val.copy()
path.append(station)
paths_tmp[station] = path
if station == end_station:
return path
paths = paths_tmp
测试:
print(BFS(lines_info,nb_info,'朝阳门','望京西'))
[‘朝阳门’, ‘东四十条’, ‘东直门’, ‘柳芳’, ‘光熙门’, ‘芍药居’, ‘望京西’]
方法2:启发式搜索 路程最短优先
def get_path_Astar(lines_info, neighbor_info, stations_info, from_station, to_station):
if not neighbor_info.get(from_station):
print('起始站点“%s”不存在。请正确输入!'%from_station)
return None
if not neighbor_info.get(to_station):
print('目的站点“%s”不存在。请正确输入!'%to_station)
return None
distances = {}
x,y = stations_info.get(to_station)
for (k,v) in stations_info.items():
x0,y0 = stations_info.get(k)
l = ((x-x0)**2 + (y-y0)**2)**0.5
distances[k] = l
searched = {}
searched[from_station] = 0
nodes = pd.DataFrame([[[from_station], 0, 0, distances.get(from_station)]],
index=[from_station], columns=['path', 'cost', 'g', 'h'])
print(nodes)
count = 0
while True:
if count > 1000:
break
nodes.sort_values('cost', inplace=True)
for index, node in nodes.iterrows():
count += 1
neighbors = neighbor_info.get(index).copy()
if len(node['path']) >= 2:
neighbors.remove(node['path'][-2])
for i in range(len(neighbors)):
count += 1
neighbor = neighbors[i]
g = node['g'] + get_distance(stations_info, index, neighbor)
h = distances[neighbor]
cost = g + h
path = node['path'].copy()
path.append(neighbor)
if neighbor == to_station:
print('共检索%d次。'%count)
return path
if neighbor in searched:
if g >= searched[neighbor]:
continue
else:
searched[neighbor] = g
nodes.drop(neighbor, axis=0, inplace=True)
row = pd.DataFrame([[path, cost, g, h]],index=[neighbor], columns=['path', 'cost', 'g', 'h'])
nodes = nodes.append(row)
else:
searched[neighbor] = g
row = pd.DataFrame([[path, cost, g, h]],index=[neighbor], columns=['path', 'cost', 'g', 'h'])
nodes = nodes.append(row)
nodes.drop(index, axis=0, inplace=True)
continue
print('未能找到路径')
return None
def get_distance(stations_info, str1, str2):
x1,y1 = stations_info.get(str1)
x2,y2 = stations_info.get(str2)
return ((x1-x2)**2 + (y1-y2)**2)** 0.5
测试:
paths = get_path_Astar(lines_info, nb_info, stations_info, '朝阳门', '望京西')
if paths:
print("路径总计%d站。"%(len(paths)-1))
print("-".join(paths))
共检索122次。 路径总计6站。 朝阳门-东四十条-东直门-柳芳-光熙门-芍药居-望京西
主函数代码:
if __name__ == '__main__':
r = requests.get('http://map.amap.com/service/subway?_1469083453978&srhdata=1100_drw_beijing.json')
r.encoding = r.apparent_encoding
lines_info, stations_info = get_lines_stations_info(r.text)
matplotlib.rcParams['font.sans-serif'] = ['SimHei']
plt.figure(figsize=(20, 20))
stations_graph = nx.Graph()
stations_graph.add_nodes_from(list(stations_info.keys()))
nx.draw(stations_graph, stations_info, with_labels=True, node_size=6)
nb_info = create_map(lines_info)
stations_connection_graph = nx.Graph(nb_info)
plt.figure(figsize=(30, 30))
nx.draw(stations_connection_graph, stations_info, with_labels=True, node_size=6)
print(BFS(lines_info,nb_info,'朝阳门','望京西'))
paths = get_path_Astar(lines_info, nb_info, stations_info, '朝阳门', '望京西')
if paths:
print("路径总计%d站。"%(len(paths)-1))
print("-".join(paths))
|