练下手,爬一下某电商网站上的商品图片,还真爬到了。代码如下:
4.28
from selenium import webdriver
import time
option = webdriver.ChromeOptions()
option.binary_location = r'C:\Program Files\Google\Chrome\Application\chrome.exe'
option.add_argument("--disable-blink-features=AutomationControlled")
#option.add_argument('headless')
driver = webdriver.Chrome(executable_path=r'C:\Program Files\Google\Chrome\Application\chromedriver.exe', options=option)
url = 'https://detail.1688.com/offer/669667221688.html?spm=a26352.13672862.offerlist.227.34b51e62ZfC32f'
driver.get(url)
time.sleep(1)
parent = driver.find_element_by_class_name('content-detail')
images = parent.find_elements_by_tag_name('img')
#print(images.size)
from urllib import request
import urllib
for image in images:
#找到图片的网址
img_url = image.get_attribute("data-lazyload-src")
#print(img_url)
#获取图片名
name = img_url.split('/')[-1]
#print(name)
#将图片存到imagess文件夹中
request.urlretrieve(img_url,f'imagess/{name}')
driver.quit()
完善了一下,可以把抓取的图片自动保存到新建的产品目录下,一个产品一个目录:
4.29
from selenium import webdriver
import time
def mkdir(path):
# 引入模块
import os
# 去除首位空格
path=path.strip()
# 去除尾部 \ 符号
path=path.rstrip("\\")
# 判断路径是否存在
# 存在 True
# 不存在 False
isExists=os.path.exists(path)
# 判断结果
if not isExists:
# 如果不存在则创建目录
# 创建目录操作函数
os.makedirs(path)
print(path+' 创建成功')
return True
else:
# 如果目录存在则不创建,并提示目录已存在
print (path+' 目录已存在')
return False
option = webdriver.ChromeOptions()
option.binary_location = r'C:\Program Files\Google\Chrome\Application\chrome.exe'
option.add_argument("--disable-blink-features=AutomationControlled")
#option.add_argument('headless')
driver = webdriver.Chrome(executable_path=r'C:\Program Files\Google\Chrome\Application\chromedriver.exe', options=option)
url = 'https://detail.1688.com/offer/669667221688.html?spm=a26352.13672862.offerlist.227.34b51e62ZfC32f'
driver.get(url)
time.sleep(1)
parent = driver.find_element_by_class_name('content-detail')
images = parent.find_elements_by_tag_name('img')
title = driver.find_element_by_class_name('title-first-column')
title_text = title.find_element_by_class_name("title-text")
detail_dir = 'imagess/'+title_text.get_attribute('innerText')[-10:]+'/details'
mkdir(detail_dir)
headimg_dir = 'imagess/'+title_text.get_attribute('innerText')[-10:]+'/headimgs'
mkdir(headimg_dir)
from urllib import request
import urllib
for image in images:
#找到图片的网址
img_url = image.get_attribute("data-lazyload-src")
print(img_url)
name = img_url.split('/')[-1]
print(name)
#将图片存到images文件夹中
request.urlretrieve(img_url,f'{detail_dir}/{name}')
detail_gallery_imgs = driver.find_elements_by_class_name('detail-gallery-img')
for image in detail_gallery_imgs:
#找到图片的网址
img_url = image.get_attribute("src")
print(img_url)
name = img_url.split('/')[-1]
print(name)
#将图片存到images文件夹中
request.urlretrieve(img_url,f'{headimg_dir}/{name}')
driver.quit()
|