import pandas as pd
import threading
import requests
from bs4 import BeautifulSoup
from time import sleep
from datetime import datetime
df = pd.read_excel("网站对应名字.xlsx")
sites = df.URL
data_count = len(sites)
thread_count = 16
threads = []
n_loops = range(thread_count)
names = [None]*data_count
def get_url_title(site):
try:
html = requests.get(site)
soup = BeautifulSoup(html.content)
return soup.find("title").text
except BaseException:
return "网址有误"
def write_title(start):
global data_count,thread_count,names
for i in range(start,data_count,thread_count):
names[i] = get_url_title(sites[i])
print(i,names[i])
def main():
global threads,n_loops
for i in n_loops:
t = threading.Thread(target=write_title,args=(i,))
threads.append(t)
for i in n_loops:
threads[i].start()
for i in n_loops:
threads[i].join()
if __name__ == '__main__':
main()
names
names
len(names)
df.info
import multiprocessing
print(multiprocessing.cpu_count())
|