from selenium import webdriver import time from numpy import * import pandas as pd
driver = webdriver.Chrome(executable_path='C:\Program Files (x86)\Google\Chrome\Application\chromedriver') content_list = [] diqu = [1025,1028,1033,1026,1034,1031,1030,1032,1024,1023,1037,1027,1035,1029,1038,1036,] for di in diqu: ? ? list_year = [2021,2020,2019,2018,2017,2016,2015,2014,2013,2012,2011,2010,2009,2008] ? ? for year in list_year: ? ? ? ? item = {} ? ? ? ? driver.get("https://www.58.com/fangjiawang/shi-{}-100/qy-{}/".format(year,di)) ? ? ? ? time.sleep(3) ? ? ? ? span = driver.find_elements_by_xpath("//*[@id='main']/div/div[4]/div[3]/div/div/div[1]/div[2]/div[2]/ul/li/a/span") ? ? ? ? qian = [int(x.text[0:-3]) for x in span] ? ? ? ? # print(qian) ? ? ? ? avg = sum(qian)/len(qian) ? ? ? ? item["di"] = di ? ? ? ? time.sleep(0.1) ? ? ? ? item["year"] = year ? ? ? ? time.sleep(0.1) ? ? ? ? item["avg"] = avg ? ? ? ? print(item) ? ? ? ? content_list.append(item)
time.sleep(3) di = [z1["di"] for z1 in content_list] year = [z2["year"] for z2 in content_list] avg = [z3["avg"] for z3 in content_list] data = pd.DataFrame({'地区': di,'年份': year,'平均房价': avg}) data.to_excel("1.xlsx") driver.quit()
|