在初步学习selenium的定位方法之后,我们就可以找些网站来测试了,这次我选择的网站是豆豆玩
测试目的
- 点掉首页弹窗
-
模拟登陆,含简单的验证码识别 -
表格提交 -
获取15期结果,存进Mysql数据库
思路与主要代码
去JS弹窗
这个容易,获取CSS标签,点掉即可:
s = Service("C:\Program Files (x86)\Google\Chrome\Application\chromedriver.exe")
driver = webdriver.Chrome(options=chrome_options, service=s)
ddwURL = "http://www.doudouwan.net/"
driver.get(url=ddwURL)
time.sleep(6)
driver.find_elements(by=By.CLASS_NAME,value='layui-layer-btn0')[0].click()
模拟登陆
这里登陆填账号密码不再赘述,重点讨论验证码识别。
网站的验证码文件是html格式,并且是随机的,网页打开就会刷新一次:2578
我的思路是:网页截取验证码所在区域的图片,保存到本地,然后进行文字OCR识别,
我们不研究识别的原理,现在有很多开源的库可以供我们使用,选择了ddddocr,除了有广告,基本可以使用:
def validate(url):
ocr = ddddocr.DdddOcr()
with open(url, 'rb') as f:
image = f.read()
res = ocr.classification(image)
return res
def snipScreent(url):
driver.get(url)
time.sleep(2)
width = driver.execute_script("return document.documentElement.scrollWidth")
height = driver.execute_script("return document.documentElement.scrollHeight")
driver.set_window_size(width, height)
r_node = driver.find_element(by='xpath', value='/html/body/div[3]/div/div[2]/dl/dd[3]/img')
print('网页模块尺寸:height={},width={}'.format(r_node.size['height'], r_node.size['width']))
times = int(time.time())
pngPath = r'D:\image\%s.png' % times
r_node.screenshot(pngPath)
return pngPath
picPath = snipScreent(url=ddwURL)
driver.find_element(by=By.CSS_SELECTOR,value="body > div.top > div > div.index_member.no_login > dl > dd.i.i_username > input").send_keys("bgone")
driver.find_element(by=By.CSS_SELECTOR,value="body > div.top > div > div.index_member.no_login > dl > dd.i.i_password > input").send_keys("123456")
num = validate(url=picPath)
driver.find_element(by=By.CSS_SELECTOR,value="body > div.top > div > div.index_member.no_login > dl > dd.i.i_code > input").send_keys(num)
time.sleep(3)
driver.find_element(by=By.CSS_SELECTOR,value="body > div.top > div > div.index_member.no_login > dl > dd.a > a.submit").click()
randomIdle()
表格提交
投注本身也不难,但我们需要写一个算法以尽量维持程序运行:
Created with Rapha?l 2.3.0
Start
初始投入100
等待结果
本局盈利d?
总共豆豆数 - 初始投入 * 2> 0?
初始投入 * 2
End
yes
no
yes
no
def throw():
asserT = False
while not asserT:
try:
t = driver.find_element(by=By.CSS_SELECTOR,
value=r"body > div.fun_main > div.fun_left > div.left_table > table > tbody > tr:nth-child(6) > td:nth-child(1)").get_attribute(
"textContent")
css = "#revoke_%s > a" % t
driver.find_element(by=By.CSS_SELECTOR, value=css).click()
except Exception as ep:
print(ep)
randomIdle()
driver.refresh()
else:
asserT = True
def bet(input=0):
a = getCoins()
driver.find_element(by=By.CSS_SELECTOR, value="#tbLuck28Value1").send_keys(input)
driver.find_element(by=By.CSS_SELECTOR, value="#tbLuck28Value3").send_keys(input)
driver.find_element(by=By.CSS_SELECTOR, value="#tbLuck28Value5").send_keys(input)
driver.find_element(by=By.CSS_SELECTOR, value="#tbLuck28Value7").send_keys(input)
driver.find_element(by=By.CSS_SELECTOR, value="#tbLuck28Value9").send_keys(input)
driver.find_element(by=By.XPATH, value="/html/body/div[3]/div[1]/div[5]/div/div[3]/div[2]/div/div[3]/a").click()
return int(a)-5*input
def getTimer():
decrypt = driver.find_element(by=By.CSS_SELECTOR,value="#bettingLottTime").get_attribute("textContent")
partake = driver.find_element(by=By.CSS_SELECTOR,value="#bettingOverTime").get_attribute("textContent")
if "已停止参与" in partake:
if "解谜中,请稍后" in decrypt:
return True
return False
p = False
while not p:
initialC = 20
mp = False
for i in range(1,maxloop):
driver.get(url=guessURL90)
throw()
a = bet(input=initialC)
timerAssert = getTimer()
while not timerAssert:
time.sleep(2)
timerAssert = getTimer()
time.sleep(5)
driver.refresh()
c = collection()
print(c)
if c < 0:
initialC = initialC * 2
if initialC*5 > (a+c):
break
time.sleep(5)
elif c > 0:
mp = True
break
数据库操作
先本地搭建mysql服务器,Navicat Premium 15 建个表ddw,字段为: 主键在Number上;然后脚本获取数据并存入:
class Sql():
def __int__(self):
self.host = "192.168.222.1"
self.username = "root"
self.password = ""
def connectMysql(self):
connection = pymysql.connect(host="localhost",
user="root",
password="",
db='ddw',
charset='utf8mb4',
cursorclass=pymysql.cursors.DictCursor)
return connection
def instertMysql(self,num,date,result,coin,hits,inn,out):
connection = self.connectMysql()
try:
with connection.cursor() as cursor:
sql = "INSERT INTO `ddw`.`ddw` (`Number`, `DateTime`, `Result`, `Coins`, `Hits`, `In`, `Out`) VALUES (%s, %s, %s, %s, %s, %s, %s)"
cursor.execute(sql, (num, date, result, coin, hits, inn, out))
connection.commit()
with connection.cursor() as cursor:
sql = "SELECT Number FROM ddw.ddw WHERE Number=%s"
cursor.execute(sql, (num))
result = cursor.fetchone()
print(result)
finally:
connection.close()
def selectMysql(self,index=0):
try:
with connection.cursor() as cursor:
if not index:
sql = "select * from ddw ORDER BY 'Number' DESC LIMIT 1"
else:
sql = "select * from ddw ORDER BY 'Number' DESC LIMIT %d"
cursor.execute(sql, (index))
result = cursor.fetchone()
print(result)
return result
finally:
connection.close()
def collection():
year = datetime.datetime.now().year
sql = Sql()
connection = sql.connectMysql()
content = driver.find_elements(by=By.XPATH,value="/html/body/div[3]/div[1]/div[5]/table/tbody/tr/td")
alist = []
for i in content:
html = etree.fromstring(i.get_attribute("innerHTML"), parser=etree.HTMLParser())
try:
alist.append(html.xpath("//text()"))
except Exception:
alist.append([i.get_attribute("innerHTML")])
usefulContent = alist[36:-1]
for index in range(0,len(usefulContent),7):
Number= int(usefulContent[index][0])
DateTime = str(year) + "-" + usefulContent[index+1][0]
Result = int(usefulContent[index+2][0])
Coins = "".join(usefulContent[index+3][0].split(","))
Hits = int("".join(usefulContent[index+4][0].split(",")))
In = int("".join(usefulContent[index+5][0].split(":")[-1].split(",")))
Out = int("".join(usefulContent[index+5][1].split(":")[-1].split(",")))
with connection.cursor() as cursor:
try:
existOne = "SELECT Number FROM ddw.ddw WHERE Number=%s"
cursor.execute(existOne, (Number))
result = cursor.fetchone()
print(result)
if not result:
sql.instertMysql(Number, DateTime, Result, Coins, Hits, In, Out)
except Exception as ep:
print(ep)
finally:
cursor.close()
connection.close()
return In-Out
结果展示:
总结
难度系数低,可以获取数据用于日后数据分析
|