import docx
from docx import Document #导入库
import re
def getJavaBeanFromTable(document):
tables = document.tables #获取文件中的表格集
a = 0
nameNumber = 0
line = ''
taleTitles = []
with open("javaBean", 'r', encoding='UTF-8') as f:
for each in f:
each = each.strip('\n')
taleTitles.append(each)
for table in tables[:]:
line+='\n'
line+='table:'+str(a)+' '+str(len(table.rows)-1)+'行\n'
nameIndex=4
msgIndex=5
keyIndex=0
typeIndex=1
firstColoumSize=5
a=a+1
title_content = []
for i, row in enumerate(table.rows[0:]): # 读每行
for cell in row.cells[:]:
if i==0 and not title_content.__contains__(cell.text):
title_content.append(cell.text)
for j, c in enumerate(title_content[:]):
if i==0 and ['字段名称', '描述'].__contains__(c):
nameIndex = j
continue
if i==0 and ['说明', '示例值'].__contains__(c):
msgIndex = j
continue
if i==0 and ['参数', '变量名'].__contains__(c):
keyIndex = j
continue
if i==0 and ['数据类型', '类型'].__contains__(c):
typeIndex = j
continue
if i==0:
firstColoumSize = len(row.cells[:])
continue
row_content = []
for cell in row.cells[:]: # 读一行中的所有单元格
c = cell.text
if (c != '' and row_content.__contains__(c)) or (len(row_content)== 0 and c == ''):
continue
row_content.append(c)
if len(row_content)>=len(title_content): # 筛选出符合要求的table,放在数组溢出
cole=row_content[nameIndex]+row_content[msgIndex]
coleName = row_content[keyIndex]
if row_content[typeIndex].lower().strip()=='number':
row_content[typeIndex]='Integer'
if row_content[typeIndex].lower().strip()=='decimal':
row_content[typeIndex]='BigDecimal'
if row_content[typeIndex].lower().strip()=='datetime':
row_content[typeIndex]='String'
if row_content[typeIndex].lower().strip()=='date':
row_content[typeIndex]='String'
if row_content[typeIndex].lower().strip()=='string':
row_content[typeIndex]='String'
b=re.sub(u"\\(.*?\\)|\\{.*?\\}|\\[.*?\\]|\\<.*?\\>", "", row_content[typeIndex])
if is_number(b):
b='String'
cole=row_content[nameIndex]+row_content[msgIndex]
coleName = row_content[keyIndex]
line+='\n'
line+='@JsonProperty("'+coleName+'")\n'
line+='private '+b+' '+convert(coleName,'_')+';\n'
with open("outPutFile", 'w', encoding='UTF-8') as f:
f.write(line)
def is_number(s):
try:
float(s)
return True
except ValueError:
pass
try:
import unicodedata
unicodedata.numeric(s)
return True
except (TypeError, ValueError):
pass
return False
def getNonRepeatList2(data):
new_data = []
for i in range(len(data)):
if data[i] not in new_data:
new_data.append(data[i])
return new_data
def convert(one_string,space_character): #one_string:输入的字符串;space_character:字符串的间隔符,以其做为分隔标志
if space_character not in one_string:
return one_string
string_list = str(one_string).split(space_character) #将字符串转化为list
first = string_list[0].lower()
others = string_list[1:]
others_capital = [word.capitalize() for word in others] #str.capitalize():将字符串的首字母转化为大写
others_capital[0:0] = [first]
hump_string = ''.join(others_capital) #将list组合成为字符串,中间无连接符。
return hump_string
if __name__ == "__main__":
path = '../doc/Api7.docx' #接口文档的文件路径
doc = Document(path) #读入文件
getJavaBeanFromTable(doc)
print("auto convert doc to bean by docx power")
安装docx 依赖包
pip3 install -i https://pypi.douban.com/simple python-docx
|