import requests
class Tieba(object):
"""获取指定贴吧的信息
用户输入什么就搜索什么信息"""
def __init__(self):
self.tieba_name = input('请输入关键词:')
# self.page = 0
self.st_page = int(input('请输入爬取的开始页'))
self.end_page = int(input('请输入爬取的结束页'))
self.url = 'https://tieba.baidu.com/f' + self.tieba_name
self.headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/70.0.3538.25 Safari/537.36 Core/1.70.3875.400 QQBrowser/10.8.4492.400'}
def send(self,params):
"""发送请求你"""
response = requests.get(url=self.url,headers=self.headers,params=params).content.decode()
return response
def save(self,data,pn):
"""写入文件"""
# print('准备写入文件')
file_path = '第二天/tieba/'+ self.tieba_name + str(pn) + '页.html'
# print(file_path)
# file_path = str(self.page/50+1) + '页.html'
# print('准备保存'+file_path)
with open(file_path ,'w',encoding='utf-8') as f:
print('正在下载第%s页' % file_path)
f.write(data)
def main(self):
"""调度方法"""
# while True:
# 构造请求参数
# user_params = {
# 'kw' : self.tieba_name,
# 'pn' : self.page
# }
# # 发送请求
# data = self.send(user_params)
# # print('找到网页')
# self.page += 50
# # 写入文件
# self.save(data)
# # 控制,避免死循环
# if self.page > 200:
# break
# 构造请求函数
for pn in range(self.st_page,self.end_page+1):
params = {
'kw' : self.tieba_name,
'pn' : (pn-1)*50
}
# 发送请求
data = self.send(params)
# 写入文件
# 通过增加pn参数使其他函数可以直接使用这个pn参数
self.save(data,pn)
if __name__ == '__main__':
Tieba().main()
|