用selenium库 先导包 我这已经导过了,只要导完不报错就行
代码附上
from selenium import webdriver
import time
class Douyu(object):
def __init__(self):
self.url = 'https://www.douyu.com/directory/all'
self.driver = webdriver.Chrome()
def paqu(self):
time.sleep(1)
el_list = self.driver.find_elements_by_xpath('//*[@id="listAll"]/section[2]/div[2]/ul/li/div')
list_duyu =[]
for el in el_list:
temp ={}
temp['title'] = el.find_element_by_xpath('./a/div[2]/div[1]/h3').text
temp['title'] = temp['title'].encode("gbk", "ignore").decode("gbk")
temp['type'] = el.find_element_by_xpath('./a/div[2]/div[1]/span').text
temp['主播'] = el.find_element_by_xpath('./a/div[2]/div[2]/h2').text
temp['热度'] = el.find_element_by_xpath('./a/div[2]/div[2]/span').text
temp['链接'] = el.find_element_by_xpath('./a').get_attribute("href")
list_duyu.append(temp)
return list_duyu
def tiqu(self,data):
for da in data:
print(da)
def run(self):
self.driver.get(self.url)
while True:
data = self.paqu()
self.tiqu(data)
try:
el_next = self.driver.find_element_by_xpath('//*[@class=" dy-Pagination-next"]/span')
print(el_next)
self.driver.execute_script('window.scrollTo(0,100000)')
el_next.click()
except:
break
if __name__ == '__main__':
douyu = Douyu()
douyu.run()
|