?
from selenium import webdriver import csv import time def csv_writer(item): ? ? with open('weibo.csv','a',encoding='gbk',newline='')as csvfile: ? ? ? ? writer=csv.writer(csvfile) ? ? ? ? try: ? ? ? ? ? ? writer.writerow(item) ? ? ? ? except: ? ? ? ? ? ? print('写入失败')
def login(): ? ? driver.get('https://weibo.com/') ? ? time.sleep(20) ? ? driver.set_window_size(1920,1080) ? ? print('准备登陆') ? ? djdl = driver.find_element_by_xpath('//*[@id="__sidebar"]/div/div[1]/div[1]/div/button') ? ? djdl.click() ? ? time.sleep(20) def xxx(): ? ? # 将滚动条移动到页面的底部 ? ? js = "var q=document.documentElement.scrollTop=100000" ? ? driver.execute_script(js) ? ? time.sleep(30)
def spider(): ? ? driver.get('https://weibo.com/') ? ? xxx() ? ? time.sleep(4) ? ? all_weibo=driver.find_elements_by_xpath('//*[@id="scroller"]/div[1]') ? ? for weibo in all_weibo: ? ? ? ? pub_id=weibo.find_elements_by_xpath('//*[@id="scroller"]/div[1]/div[1]/div/article/div/header/div[1]/div/div[1]/a')[0].text ? ? ? ? pub_id_url = weibo.find_elements_by_xpath('//*[@id="scroller"]/div[1]/div[1]/div/article/div/header/div[1]/div/div[2]/a')[0].get_attribute('href') ? ? ? ? pub_content=weibo.find_elements_by_xpath('//*[@id="scroller"]/div[1]/div[1]/div/article/div/div/div[1]')[0].text ? ? ? ? item=[pub_id,pub_id_url,pub_content] ? ? ? ? print('抓取成功',pub_id) ? ? ? ? csv_writer(item) ? ? ? ? #driver.quit() if __name__=='__main__': ? ? driver=webdriver.Edge('C:\Program Files (x86)\Microsoft\Edge\Application\msedgedriver.exe') ? ? login() ? ? while True: ? ? ? ? spider() ? ? ? ? time.sleep(60)
|