知识点
- 爬虫基本流程
- json
- requests 爬虫当中 发送网络请求
- pandas 表格处理 / 保存数据
- pyecharts 可视化
开发环境
- python 3.8 比较稳定版本 解释器发行版 anaconda jupyter notebook 里面写数据分析代码 专业性
- pycharm 专业代码编辑器 按照年份与月份划分版本的
爬虫完整代码
导入模块
import requests
import json
import pprint
import pandas as pd
分析网站
先找到今天要爬取的目标数据
https://news.qq.com/zt2020/page/feiyan.htm
data:image/s3,"s3://crabby-images/0b604/0b6041e6c0fad67e89a0111f2148667a39d20a8c" alt="" 找到数据所在url data:image/s3,"s3://crabby-images/0f640/0f6409f598165f42dc1e2f475b9a9370dff26cf5" alt="" data:image/s3,"s3://crabby-images/4595a/4595aedc51c074b14df91b25f53b411224cea4fa" alt=""
发送请求
url = 'https://view.inews.qq.com/g2/getOnsInfo?name=disease_h5&_=1638361138568'
response = requests.get(url, verify=False)
获取数据
json_data = response.json()['data']
解析数据
json_data = json.loads(json_data)
china_data = json_data['areaTree'][0]['children']
data_set = []
for i in china_data:
data_dict = {}
data_dict['province'] = i['name']
data_dict['nowConfirm'] = i['total']['nowConfirm']
data_dict['dead'] = i['total']['dead']
data_dict['heal'] = i['total']['heal']
data_dict['deadRate'] = i['total']['deadRate']
data_dict['healRate'] = i['total']['healRate']
data_set.append(data_dict)
保存数据
df = pd.DataFrame(data_set)
df.to_csv('data.csv')
data:image/s3,"s3://crabby-images/cba4a/cba4a23b0d4c12b8466bbb1a14837909c1c7d6ec" alt=""
数据可视化
导入模块
from pyecharts import options as opts
from pyecharts.charts import Bar,Line,Pie,Map,Grid
读取数据
df2 = df.sort_values(by=['nowConfirm'],ascending=False)[:9]
df2
data:image/s3,"s3://crabby-images/6c384/6c384cfd6cca8af79663dc8e17742a40b02e1c4e" alt=""
死亡率与治愈率
line = (
Line()
.add_xaxis(list(df['province'].values))
.add_yaxis("治愈率", df['healRate'].values.tolist())
.add_yaxis("死亡率", df['deadRate'].values.tolist())
.set_global_opts(
title_opts=opts.TitleOpts(title="死亡率与治愈率"),
)
)
line.render_notebook()
data:image/s3,"s3://crabby-images/30d6c/30d6c40c4daa9e16af58344356b7174541c295d6" alt=""
各地区确诊人数与死亡人数情况
bar = (
Bar()
.add_xaxis(list(df['province'].values)[:6])
.add_yaxis("死亡", df['dead'].values.tolist()[:6])
.add_yaxis("治愈", df['heal'].values.tolist()[:6])
.set_global_opts(
title_opts=opts.TitleOpts(title="各地区确诊人数与死亡人数情况"),
datazoom_opts=[opts.DataZoomOpts()],
)
)
bar.render_notebook()
data:image/s3,"s3://crabby-images/0e827/0e82735ec30aa28f410d67b9126a0f8ff851e108" alt=""
|