day02-get请求的两种方法
1、get请求quote方法
# quote方法的作用:能够将一个词变成对应的unicode编码
import urllib.request
# 需求: 获取https://www.baidu.com/s?wd=周杰伦的网页源码
url = 'https://www.baidu.com/s?wd='
# 请求对象的定制为了解决反爬的第一种手段
headers = {
'user-agent':' Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36'
}
# 将周杰伦变成unicode编码的格式
# 需要依赖于urllib.parse
name = urllib.parse.quote("周杰伦")
# 拼接url
url = url + name
# 请求对象的定制
request = urllib.request.Request(url=url,headers=headers)
# 模拟浏览器向服务器发送请求
response = urllib.request.urlopen(request)
# 获取响应的内容
content = response.read().decode("utf8")
# 打印数据
print(content)
2、get请求urlencode方法
import urllib.request
#
# urlencode应用场景:多个参数的时候
# https://www.baidu.com/s?ie=UTF-8&wd=%E5%91%A8%E6%9D%B0%E4%BC%A6&sex=%E7%94%B7
# data = {
# "wd": "周杰伦",
# "sex": "男",
# "location": "中国-台湾"
# }
# a = urllib.parse.urlencode(data)
# print(a)
#获取此网页的网页源码 https://www.baidu.com/s?ie=UTF-8&wd=%E5%91%A8%E6%9D%B0%E4%BC%A6&sex=%E7%94%B7&location=%E4%B8%AD%E5%9B%BD%E5%8F%B0%E6%B9%BE%E7%9C%81
data = {
"wd": "周杰伦",
"sex": "男",
"location": "中国-台湾"
}
newData = urllib.parse.urlencode(data)
baseUrl = 'https://www.baidu.com/s?wd='
url = baseUrl + newData
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/96.0.4664.110 Safari/537.36'
}
# 请求对象的定制
request = urllib.request.Request(url=url,headers=headers)
# 模拟浏览器向服务器发送请求
response = urllib.request.urlopen(request)
# 获取网页源码的数据
content = response.read().decode("utf8")
print(content)
|