图片
爬取单张图片
from lxml import etree
import requests
import os
if __name__=="__main__":
if not os.path.exists('zhaopian'):
os.mkdir('zhaopian')
headers={
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36'
}
url='https://pic.netbian.com/4kmeinv/'
response=requests.get(url=url,headers=headers)
resp=response.text
tree=etree.HTML(resp)
li_list=tree.xpath('//div[@class="slist"]/ul/li')
for li in li_list:
tupian_src='https://pic.netbian.com'+li.xpath('./a/img/@src')[0]
name=li.xpath('./a/b/text()')[0]+'.jpg'
name = name.encode('iso-8859-1').decode('gbk')
data=requests.get(url=tupian_src,headers=headers).content
path='zhaopian/'+name
with open(path,'wb') as fp:
fp.write(data)
print("下载成功!!!")
import os
import requests
from lxml import etree
header = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36 SLBrowser/7.0.0.5211 SLBChan/25"
}
url = "https://pic.netbian.com/4kmeinv/"
girl_data=requests.get(url=url,headers=header).text
girl_data=girl_data.encode("iso-8859-1").decode('gbk')
girl_etree=etree.HTML(girl_data)
picture_loc=girl_etree.xpath("//ul[@class='clearfix']/li/a/img/@src")
picture_name_list=girl_etree.xpath("//ul[@class='clearfix']/li/a/img/@alt")
if not os.path.exists("you_knew_about_picture"):
os.mkdir("./you_knew_about_picture")
for i,each_loc in enumerate(picture_loc):
new_loc="https://pic.netbian.com/"+each_loc
each_picture_data=requests.get(new_loc,headers=header).content
each_picture_name="you_knew_about_picture/"+picture_name_list[i]+".jpg"
fp=open(each_picture_name,mode="wb")
fp.write(each_picture_data)
fp.close()
print(each_picture_name.split("/")[-1]+" have been over")
爬取多张图片
import requests
from lxml import etree
import os
if __name__=="__main__":
if not os.path.exists('zhao'):
os.mkdir('zhao')
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36'
}
for i in range(2,5):
url=f'https://pic.netbian.com/4kmeinv/index_{i}.html'
resp=requests.get(url=url,headers=headers).text
tree=etree.HTML(resp)
li_list=tree.xpath('//*[@id="main"]/div[3]/ul/li')
for li in li_list:
src_url='https://pic.netbian.com'+li.xpath('./a/img/@src')[0]
src_name=li.xpath('./a/b/text()')[0]+'.jpg'
name=src_name.encode('iso-8859-1').decode('gbk')
data=requests.get(url=src_url,headers=headers).content
path='zhao/'+name
with open(path,'wb') as fp:
fp.write(data)
print('下载成功!!!')
别人代码
import os
import requests
from lxml import etree
header = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.89 Safari/537.36 SLBrowser/7.0.0.5211 SLBChan/25"
}
picture_loc = []
picture_name_list = []
for i in range(2, 4):
url = "https://pic.netbian.com/4kmeinv/index_{0}.html"
url = url.format(i)
girl_data = requests.get(url=url, headers=header).text
girl_data = girl_data.encode("iso-8859-1").decode('gbk')
girl_etree = etree.HTML(girl_data, )
picture_loc.extend(girl_etree.xpath("//ul[@class='clearfix']/li/a/img/@src"))
picture_name_list.extend(girl_etree.xpath("//ul[@class='clearfix']/li/a/b/text()"))
if not os.path.exists("you_knew_about_picture"):
os.mkdir("./you_knew_about_picture")
a = 0
for i, each_loc in enumerate(picture_loc):
new_loc = "https://pic.netbian.com/" + each_loc
each_picture_data = requests.get(new_loc, headers=header).content
each_picture_name = "you_knew_about_picture/" + str(a) + " . " + picture_name_list[i] + ".jpg"
fp = open(each_picture_name, mode="wb")
fp.write(each_picture_data)
fp.close()
print(each_picture_name.split("/")[-1] + " have been over")
a = a + 1
print(a)
|