soup.select('div') 所有名为<div>的元素
soup.select('#author') 带有id 属性为author 的元素
soup.select('.notice') 所有使用CSS class 属性名为notice 的元素
soup.select('div span') 所有在<div>元素之内的<span>元素
soup.select('div > span') 所有直接在<div>元素之内的<span>元素,中间没有其他元素
soup.select('input[name]') 所有名为<input>,并有一个name 属性,其值无所谓的元素
soup.select('input[type="button"]') 所有名为<input>,并有一个type 属性,其值为button 的元素
request模拟登陆:
import requests,json
login_url = '要登陆的url'
s = requests.session()
data = {'email':'989898','password':'admin'}
datas = json.dumps(data)
hers = {'content-type':'application/x-www-form-urlencoded',
'user-agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36'}
res=s.post(login_url,datas,headers = hers)
print(res.text)
isalpha()返回True,如果字符串只包含字母,并且非空;
isalnum()返回True,如果字符串只包含字母和数字,并且非空;
isdecimal()返回True,如果字符串只包含数字字符,并且非空;
isspace()返回True,如果字符串只包含空格、制表符和换行,并且非空;
istitle()返回True,如果字符串仅包含以大写字母开头、后面都是小写字母的单词
isupper()主符串是否全为大写 并且非空
islower()字符串是否全为小写 并且非空
连接数据库:
import pymysql.cursors
connection = pymysql.connect(host = 'localhost',
user = 'root',
password = '123456',
db = 'mydb',
charset = 'utf8mb4')
try:
with connection.cursor() as cursor:
sql = “insert into `表名` (`name`,`sex`) values(%s,%s)”
cursor.execute(sql,("用变量循环写入的内容1","用变量写入的内容2"))
connection.commit()
finally:
connection.close()
合并pdf:
from PyPDF2 import PdfFileReader, PdfFileWriter
def split_pdf(infn, outfn):
pdf_output = PdfFileWriter()
pdf_input = PdfFileReader(open(infn, 'rb'))
page_count = pdf_input.getNumPages()
print(page_count)
for i in range(5, page_count):
pdf_output.addPage(pdf_input.getPage(i))
pdf_output.write(open(outfn, 'wb'))
def merge_pdf(infnList, outfn):
pdf_output = PdfFileWriter()
for infn in infnList:
pdf_input = PdfFileReader(open(infn, 'rb'))
page_count = pdf_input.getNumPages()
print(page_count)
for i in range(page_count):
pdf_output.addPage(pdf_input.getPage(i))
pdf_output.write(open(outfn, 'wb'))
if __name__ == '__main__':
infn = 'c:\\11.pdf'
outfn = 'c:\\12.pdf'
split_pdf(infn, outfn)
----------------------------------pdf转word----------打印出来文字---------------------
import pdfminer
from pdfminer.converter import PDFPageAggregator
from pdfminer.layout import LAParams
from pdfminer.pdfparser import PDFParser, PDFDocument
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
from pdfminer.pdfdevice import PDFDevice
fp = open('c:\\培训脚本-20160810.pdf','rb')
parser = PDFParser(fp)
doc = PDFDocument()
parser.set_document(doc)
doc.set_parser(parser)
doc.initialize("")
rsource = PDFResourceManager()
laparam = LAParams()
divce = PDFPageAggregator(rsource,laparams=laparam)
interpreter = PDFPageInterpreter(rsource,divce)
for page in doc.get_pages():
interpreter.process_page(page)
layout = divce.get_result()
for out in layout:
if hasattr(out,"get_text"):
out_utf8 = out.get_text()
print(out_utf8)
-----------------------------pdf不转处理完还是pdf-----------------------------------------
import PyPDF2,docx
pdf_file = open('c:\\11.pdf','rb')
pdfread = PyPDF2.PdfFileReader(pdf_file)
pdfpages = pdfread.getNumPages()
print(pdfpages)
page = pdfread.getPage(11)
pdf_output = PyPDF2.PdfFileWriter()
pdf_output.addPage(page)
outs = open('c:\\12.pdf','wb')
pdf_output.write(outs)
outs.close()
------------------------subprocess子进程控制-----------------------
import subprocess
show_port = subprocess.Popen(['netstat','-ano'],stdout=subprocess.PIPE)
show_port2 = subprocess.Popen(['findstr','80'],stdin=show_port.stdout,stdout=subprocess.PIPE)
outs = show_port2.communicate()
out_list = str(outs)
for i in out_list.split(r'\r\n'):
print(i)
----------python跳过ssl不信任证书报错-----------------------
ssl._create_default_https_context = ssl._create_unverified_context
-----------------pyautogui操作键盘和鼠标----------------------------------------
import pyautogui,time
|