前言
部分参考blog
https://blog.csdn.net/m0_50944918/article/details/112135031 https://blog.csdn.net/hjxu2016/article/details/77833336 https://www.bilibili.com/video/BV1kT4y1L7e5?from=search&seid=1953780930962809824&spm_id_from=333.337.0.0
吐槽
这两天想尝试通过拼图拖动这种验证码,后来经过查询发现很多公司都是用的极验。随机上网查了部分博客,从中学习了很多种方法。但是个人觉得都比较复杂,然后自己使用了cv2的灰度+二值化发现可以轻松获取到拼图的边缘,所以边缘检测这边是我自己写得,可以获取到按钮拖动的距离。下面的拖动加速度等拖动操作主要参考了别人的代码(^_?)☆ 感谢@我不是秃头sheep的帮忙
代码
1.引入库
import random
import time
import cv2
import numpy as np
from selenium import webdriver
2.初始化浏览器驱动,设置全局变量btn
options = webdriver.ChromeOptions()
options.add_argument("--auto-open-devtools-for-tabs")
driver = webdriver.Chrome(chrome_options=options)
btn = None
3.编写爬虫对网页元素截图
def get_pic(self):
url = "https://www.geetest.com/Register?email="
self.driver.get(url)
self.driver.maximize_window()
self.driver.implicitly_wait(10)
self.driver.find_element_by_xpath(
'//*[@id="gt-register-mobile"]/div/div[2]/div[1]/div[2]/div/div[3]/div[1]/input').send_keys("1")
self.driver.find_element_by_xpath(
'//*[@id="gt-register-mobile"]/div/div[2]/div[1]/div[2]/div/div[3]/div[2]/div[1]/div').click()
ele = self.driver.find_element_by_xpath(
'/html/body/div[3]/div[2]/div[6]/div/div[1]/div[1]/div/a/div[1]/div/canvas[2]')
time.sleep(0.5)
ele.screenshot('ele.png')
global btn
btn = self.driver.find_element_by_xpath("/html/body/div[3]/div[2]/div[6]/div/div[1]/div[2]/div[2]")
4.cv2边缘识别,获取平移值
def get_targetOffsetX(self):
"""模仿人的拖拽动作:快速沿着X轴拖动(存在误差),再暂停,然后修正误差
防止被检测为机器人,出现“图片被怪物吃掉了”等验证失败的情况
:param source:要拖拽的html元素
:param targetOffsetX: 拖拽目标x轴距离
:return: None"""
img = cv2.imread("ele.png", 0)
_, th = cv2.threshold(img, 80, 255, 1)
th1 = th[0:th.shape[0], 0:55]
th2 = th[0:th.shape[0], 55:th.shape[1]]
contours2, hierarchy2 = cv2.findContours(th2, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
temps = list(map(lambda x: np.array(x).reshape(np.array(x).shape[0], 2), contours2))
result = np.concatenate(tuple(temps), axis=0)
x1 = list(map(lambda x: x[0], result))
return int(max(x1))
5.拖动(ActionChains)
def simulateDragX(self, source, targetOffsetX):
action_chains = webdriver.ActionChains(self.driver)
action_chains.click_and_hold(source)
sumOffsetx = random.randint(-5, 5)
action_chains.move_by_offset(targetOffsetX + sumOffsetx, 0)
action_chains.pause(self.__getRadomPauseScondes())
fixedOffsetX = 0
if sumOffsetx < 0:
offsetx = random.randint(sumOffsetx, 0)
else:
offsetx = random.randint(0, sumOffsetx)
fixedOffsetX = fixedOffsetX + offsetx
action_chains.move_by_offset(-offsetx, 0)
action_chains.pause(self.__getRadomPauseScondes())
action_chains.move_by_offset(-sumOffsetx + fixedOffsetX, 0)
action_chains.pause(self.__getRadomPauseScondes())
action_chains.release()
action_chains.perform()
def simpleSimulateDragX(self, source, targetOffsetX):
"""
简单拖拽模仿人的拖拽:快速沿着X轴拖动,直接一步到达正确位置,再暂停一会儿,然后释放拖拽动作
B站是依据是否有暂停时间来分辨人机的,这个方法适用。
:param source:
:param targetOffsetX:
:return: None
"""
action_chains = webdriver.ActionChains(self.driver)
action_chains.click_and_hold(source)
action_chains.pause(0.2)
action_chains.move_by_offset(targetOffsetX, 0)
action_chains.pause(0.6)
action_chains.release()
action_chains.perform()
完整代码
import random
import time
import cv2
import numpy as np
from selenium import webdriver
options = webdriver.ChromeOptions()
options.add_argument("--auto-open-devtools-for-tabs")
driver = webdriver.Chrome(chrome_options=options)
btn = None
class DragUtil():
def __init__(self, driver):
self.driver = driver
def get_pic(self):
url = "https://www.geetest.com/Register?email="
self.driver.get(url)
self.driver.maximize_window()
self.driver.implicitly_wait(10)
self.driver.find_element_by_xpath(
'//*[@id="gt-register-mobile"]/div/div[2]/div[1]/div[2]/div/div[3]/div[1]/input').send_keys("1")
self.driver.find_element_by_xpath(
'//*[@id="gt-register-mobile"]/div/div[2]/div[1]/div[2]/div/div[3]/div[2]/div[1]/div').click()
ele = self.driver.find_element_by_xpath(
'/html/body/div[3]/div[2]/div[6]/div/div[1]/div[1]/div/a/div[1]/div/canvas[2]')
time.sleep(0.5)
ele.screenshot('ele.png')
global btn
btn = self.driver.find_element_by_xpath("/html/body/div[3]/div[2]/div[6]/div/div[1]/div[2]/div[2]")
def __getRadomPauseScondes(self):
"""
:return:随机的拖动暂停时间
"""
return random.uniform(0.6, 0.9)
def get_targetOffsetX(self):
"""模仿人的拖拽动作:快速沿着X轴拖动(存在误差),再暂停,然后修正误差
防止被检测为机器人,出现“图片被怪物吃掉了”等验证失败的情况
:param source:要拖拽的html元素
:param targetOffsetX: 拖拽目标x轴距离
:return: None"""
img = cv2.imread("ele.png", 0)
_, th = cv2.threshold(img, 80, 255, 1)
th1 = th[0:th.shape[0], 0:55]
th2 = th[0:th.shape[0], 55:th.shape[1]]
contours2, hierarchy2 = cv2.findContours(th2, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
temps = list(map(lambda x: np.array(x).reshape(np.array(x).shape[0], 2), contours2))
result = np.concatenate(tuple(temps), axis=0)
x1 = list(map(lambda x: x[0], result))
return int(max(x1))
def simulateDragX(self, source, targetOffsetX):
action_chains = webdriver.ActionChains(self.driver)
action_chains.click_and_hold(source)
sumOffsetx = random.randint(-5, 5)
action_chains.move_by_offset(targetOffsetX + sumOffsetx, 0)
action_chains.pause(self.__getRadomPauseScondes())
fixedOffsetX = 0
if sumOffsetx < 0:
offsetx = random.randint(sumOffsetx, 0)
else:
offsetx = random.randint(0, sumOffsetx)
fixedOffsetX = fixedOffsetX + offsetx
action_chains.move_by_offset(-offsetx, 0)
action_chains.pause(self.__getRadomPauseScondes())
action_chains.move_by_offset(-sumOffsetx + fixedOffsetX, 0)
action_chains.pause(self.__getRadomPauseScondes())
action_chains.release()
action_chains.perform()
def simpleSimulateDragX(self, source, targetOffsetX):
"""
简单拖拽模仿人的拖拽:快速沿着X轴拖动,直接一步到达正确位置,再暂停一会儿,然后释放拖拽动作
B站是依据是否有暂停时间来分辨人机的,这个方法适用。
:param source:
:param targetOffsetX:
:return: None
"""
action_chains = webdriver.ActionChains(self.driver)
action_chains.click_and_hold(source)
action_chains.pause(0.2)
action_chains.move_by_offset(targetOffsetX, 0)
action_chains.pause(0.6)
action_chains.release()
action_chains.perform()
if __name__ == "__main__":
_driver = DragUtil(driver)
_driver.get_pic()
_driver.simulateDragX(btn, _driver.get_targetOffsetX())
总结
目前成功率还算可以,代码可继续完善,如检测失败后对平移值进行误差范围内的增减,cv2二值化阈值进行调整等。完善后代码的可行性会更高。此外还有很多方法,如特征识别等。未来有机会学习到的话会做出来分享给大家(自己)。o(′^`)o
|