方法一
"""
需求:
1.爬取全国所有的城市名称以及对应的气温
2.保存所有的城市名称以及对应的气温到为csv文件
目标url:
1.华北地区:http://www.weather.com.cn/textFC/hb.shtml
2.东北地区:http://www.weather.com.cn/textFC/db.shtml
3.华东地区:http://www.weather.com.cn/textFC/hd.shtml
4.华中地区:http://www.weather.com.cn/textFC/hz.shtml
5.华南地区:http://www.weather.com.cn/textFC/hn.shtml
6.西北地区:http://www.weather.com.cn/textFC/xb.shtml
7.西南地区:http://www.weather.com.cn/textFC/xn.shtml
8.港澳台地区:http://www.weather.com.cn/textFC/gat.shtml
规律: 'http://www.weather.com.cn/textFC/' + dq_name + '.shtml' 其中,dq_name = [hb,db,hd,hz,hn,xb,xn,gat]
"""
import requests
def get_source(url):
response=requests.get(url)
response.encoding='utf-8'
return response.text
from bs4 import BeautifulSoup
def get_info(source):
soup = BeautifulSoup(source, 'html5lib')
conMidtab = soup.find('div', class_='conMidtab')
tables = conMidtab.find_all('table')
info = []
for table in tables:
trs = table.find_all('tr')[2:]
for index,tr in enumerate(trs):
tds = tr.find_all('td')
city_td = tds[0]
if index == 0:
city_td = tds[1]
city = list(city_td.stripped_strings)[0]
temp_high_td = tds[-5]
temp_high = list(temp_high_td.stripped_strings)[0]
temp_low_td = tds[-2]
temp_low = list(temp_low_td.stripped_strings)[0]
item = city,temp_high,temp_low
info.append(item)
return info
import csv
def save_weather(info):
with open('weatherinfo.csv','w',encoding='UTF-8',newline='') as f:
filenames=['city','temp_high','temp_low']
writer=csv.DictWriter(f,fieldnames=filenames)
writer.writeheader()
for each_city in info:
each_city=list(each_city)
dict_info=dict(zip(filenames,each_city))
writer.writerow(dict_info)
def main():
info_list = []
dq_names = ['hb', 'db', 'hd', 'hz', 'hn', 'xb', 'xn', 'gat']
for dq_name in dq_names:
url = 'http://www.weather.com.cn/textFC/' + dq_name + '.shtml'
source = get_source(url)
info = get_info(source)
info_list += info
save_weather(info_list)
if __name__ == '__main__':
main()
方法二
import requests
from bs4 import BeautifulSoup
def parse_page(url):
response = requests.get(url)
text = response.content.decode('utf-8')
soup = BeautifulSoup(text,'html5lib')
conMidtab = soup.find('div',class_='conMidtab')
tables = conMidtab.find_all('table')
for table in tables:
trs = table.find_all('tr')[2:]
for index,tr in enumerate(trs):
tds = tr.find_all('td')
city_td = tds[0]
if index == 0:
city_td = tds[1]
city = list(city_td.stripped_strings)[0]
temp_td = tds[-2]
temp = list(temp_td.stripped_strings)[0]
print('城市:',city,'温度:',temp)
def main():
url = 'http://www.weather.com.cn/textFC/hb.shtml'
url = 'http://www.weather.com.cn/textFC/gat.shtml'
urls = ['http://www.weather.com.cn/textFC/hb.shtml','http://www.weather.com.cn/textFC/db.shtml' ,'http://www.weather.com.cn/textFC/gat.shtml']
for url in urls:
parse_page(url)
if __name__ == '__main__':
main()
|