本案例只为需学习,不为其他!
在这里插入代码片
```import re
import threading
import requests
import os
import openpyxl
from fake_useragent import UserAgent
ua=UserAgent()
headers={'User-Agent':'ua'}
lst = []
def get_page(n):
url=f'https://ac.qq.com/Comic/index/page/{n}'
resp=requests.get(url,headers=headers).text
page1=re.compile('<li\sclass="ret-search-item clearfix">[\s\S]+?</li>')
page=page1.findall(resp)
for page1 in page:
lst1 = []
name1=re.compile('<a class="mod-cover-list-thumb mod-cover-effect ui-db" title="(.*?)" href="(.*?)" target="_blank">')
title=name1.findall(page1)[0][0]
url='https://ac.qq.com/'+name1.findall(page1)[0][1]
writer1=re.compile('<p class="ret-works-author" title="(.*?)">.*?</p>')
writer=writer1.findall(page1)[0]
pop1=re.compile('<span>人气:<em>(.*?)</em></span>')
pop=pop1.findall(page1)[0]
description=re.compile('<p class="ret-works-decs">(.*?)</p>')
des=description.findall(page1)
if des==[]:
des1='None'
else:
des1=des
lst1=[title,url,writer,pop,des1[0]]
lst.append(lst1)
save_xls(lst)
def save_pic():
pass
def save_xls(lst):
wb=openpyxl.Workbook()
sheet=wb.active
lst1=['名称','链接','作者','人气','描述']
sheet.append(lst1)
for l in lst:
sheet.append(l)
print(l[0],'存储完毕!')
wb.save('腾讯动漫数据.xlsx')
if __name__ == '__main__':
s=threading.BoundedSemaphore(5)
for n in range(1,461):
t=threading.Thread(target=get_page,args=(n,))
t.start()
![在这里插入图片描述](https://img-blog.csdnimg.cn/2ee461bebe5849f7a50743bd61286564.png?x-oss-process=image/watermark,type_d3F5LXplbmhlaQ,shadow_50,text_Q1NETiBA5b6Q5rWq6ICB5biI,size_20,color_FFFFFF,t_70,g_se,x_16)
|