requests爬虫
一、基本使用
- 安装
pip install requests
- response的属性以及类型
-
基本使用 import requests
url = 'http://www.baidu.com'
resp = requests.get(url)
resp.encoding = 'utf-8'
-
get请求 import requests
url = 'https://www.baidu.com/s?'
headers = {
"User-Agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36',
}
data = {
'wd':'温州'
}
resp = requests.get(url=url,params=data,headers=headers)
resp.encoding = 'utf-8'
print(resp.text)
- post请求
import requests
import json
url = 'https://fanyi.baidu.com/sug'
headers = {
"User-Agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36',
}
data = {
'kw': 'eye'
}
resp = requests.post(url=url,data=data,headers=headers)
resp.encoding = 'utf-8'
context = resp.text
print(json.loads(context))
-
代理 import requests
url = 'https://www.baidu.com/s'
headers = {
"User-Agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36',
}
data = {
'wd': 'ip'
}
proxy = {
'http' : '120.220.220.95:8085'
}
resp = requests.get(url=url,params=data,headers=headers,proxies=proxy)
resp.encoding = 'utf-8'
context = resp.text
with open('代理.html','w',encoding='utf-8') as fp:
fp.write(context)
-
爬 古诗文网 登入 import requests
import lxml.etree
import urllib.request
source_url = 'https://so.gushiwen.cn/user/login.aspx?from=http://so.gushiwen.cn/user/collect.aspx'
headers = {
"User-Agent": 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/97.0.4692.71 Safari/537.36',
}
login_resp = requests.get(url=source_url, headers=headers)
context = login_resp.text
tree = lxml.etree.HTML(context)
__VIEWSTATE = tree.xpath("//input[@id='__VIEWSTATE']/@value")
__VIEWSTATEGENERATOR = tree.xpath("//input[@id='__VIEWSTATEGENERATOR']/@value")
img_src = 'https://so.gushiwen.cn' + tree.xpath('//img[@id="imgCode"]/@src')[0]
session = requests.session()
code_resp = session.get(img_src)
context_code = code_resp.content
with open('古诗文网验证码.png','wb') as fp:
fp.write(context_code)
img_code = input('请输入验证码')
login_url = 'https://so.gushiwen.cn/user/login.aspx?from=http://so.gushiwen.cn/user/collect.aspx'
login_data = {
'__VIEWSTATE': __VIEWSTATE,
'__VIEWSTATEGENERATOR': __VIEWSTATEGENERATOR,
'from': 'http://so.gushiwen.cn/user/collect.aspx',
'email': '995931576@qq.com',
'pwd': '852766122',
'code': img_code,
'denglu': '登录'
}
resp = session.post(url=login_url, data=login_data, headers=headers)
with open('gsw.html','w',encoding='utf-8') as fp:
fp.write(resp.text)
|