import time
import re
import threading
import queue
from selenium import webdriver
baseurl = 'http://www....{}...html'
url_list = [baseurl.format(x) for x in range(1, 13)]
q = queue.Queue()
for i in url_list:
q.put(i)
class dragen(object):
def __init__(self):
self.max_thread = 12
def getdata(self, que):
while not que.empty():
url = que.get()
print(url)
options = webdriver.ChromeOptions()
options.binary_location = r"C:\....\chrome.exe"
driver = webdriver.Chrome(options=options)
driver.get(url)
time.sleep(10)
content = driver.page_source
driver.close()
url_data = re.findall('"url":"(.*?)",', content)
print(url_data)
if len(url_data) == 0:
q.put(url)
def many_t(self):
t_lists = []
for i in range(self.max_thread):
t = threading.Thread(target=self.getdata, args=(q, ))
t.start()
t_lists.append(t)
for t_list in t_lists:
t_list.join()
def main(self):
self.many_t()
def main():
dragen().main()
if __name__ == '__main__':
main()
|