from pyhanlp import *
import re
import difflib
from py2neo.matching import NodeMatcher
import pymongo
import re
#连接neo4j 建立关系
from py2neo import Graph,Node,Relationship
base_struc=[["/n","*","/v","**","/n","***"],["/n","*","/p","**","/s","***"],["n","*","v","**","","***"]]
graph = Graph("bolt://localhost:7687",auth=("neo4j","123456"))
spl_list=[("叶革质",["叶子","有","皮革质感"])]
bef_list=[]
bef_str_splo=[]
bef_splo=[]
bef_best_sol_num=0
def jianli_relation(str_1,str_2,str_3):
test_node_1 = Node(label='rl', name=str_1)
test_node_2 = Node(label='rl', name=str_3)
graph.create(test_node_1)
graph.create(test_node_2)
rell = Relationship(test_node_1, str_2, test_node_2)
graph.create(rell)
while True:
input_S=input("输入处理文字")
s = str(HanLP.segment(input_S))
string = str(s)
print(s)
def subString(string):
rule = r'/[a-z]+' # 正则规则
slotList = re.findall(rule, string)
rule_s = r'[\u4e00-\u9fa5]+'
ss = re.findall(rule_s, string)
return slotList, ss
smil_list = []
slotList, ss = subString(string)
sc=list(ss)
slotList_c=list(slotList)
print(ss)
print(slotList)
str_slotList = ""
for i in range(len(slotList)):
str_slotList = str_slotList + str(slotList[i])
for i in range(len(base_struc)):
str_base = ""
for j in range(len(base_struc[i])):
if str(base_struc[i][j]) != "*" and (str(base_struc[i][j]) != "**") and (
str(base_struc[i][j]) != "***") and (str(base_struc[i][j]) != '(*)') and (
str(base_struc[i][j]) != '(**)') and (str(base_struc[i][j]) != '(***)'):
str_base = str_base + str(base_struc[i][j])
r = difflib.SequenceMatcher(None, str_slotList, str_base).ratio()
smil_list.append(r)
best_project = smil_list.index(max(smil_list))
full_element = 0
print(smil_list)
base_struc_tar = list(base_struc[best_project])
while True:
if '*' in base_struc_tar:
base_struc_tar.remove('*')
elif '**' in base_struc_tar:
base_struc_tar.remove('**')
elif '***' in base_struc_tar:
base_struc_tar.remove('***')
elif '(*)' in base_struc_tar:
base_struc_tar.remove('(*)')
elif '(**)' in base_struc_tar:
base_struc_tar.remove('(**)')
elif '(***)' in base_struc_tar:
base_struc_tar.remove('(***)')
else:
break
# 只判断带星元素
bool_re_first=False
bool_re_sec=False
bool_re_thir=False
if '*' in base_struc[best_project]:
re_first = base_struc[best_project].index('*') - 1
bool_re_first=True
if '**' in base_struc[best_project]:
re_sec = base_struc[best_project].index('**') - 1
bool_re_sec=True
if '***' in base_struc[best_project]:
re_third = base_struc[best_project].index('***') - 1
bool_re_thir=True
slotList_copy = list(slotList)
all_right=True
if bool_re_first==True:
if base_struc[best_project][re_first] in slotList_copy:
slotList_copy.remove(base_struc[best_project][re_first])
else:
all_right = False
if bool_re_sec==True:
if base_struc[best_project][re_sec] in slotList_copy:
full_element = full_element + 1
slotList_copy.remove(base_struc[best_project][re_sec])
else:
all_right = False
if bool_re_thir==True:
if base_struc[best_project][re_third] in slotList_copy:
full_element = full_element + 1
slotList_copy.remove(base_struc[best_project][re_third])
else:
all_right = False
# 只匹配带星元素
def fous(index_base_Struc, list_str, list_num):
if '(*)' in base_struc[best_project]or'(**)' in base_struc[best_project]or'(***)' in base_struc[best_project]:
thir_list = []
if '(*)' in base_struc[best_project]:
re_fi_x = base_struc[bef_best_sol_num].index('*')-1
index_num = bef_splo.index(base_struc[bef_best_sol_num][re_fi_x])
thir_list.append(bef_str_splo[index_num])
else:
index_num = list_num.index(base_struc[index_base_Struc][re_first])
list_num.remove(list_num[index_num])
thir_list.append(list_str[index_num])
list_str.remove(list_str[index_num])
if '(**)' in base_struc[best_project]:
re_fi_x = base_struc[bef_best_sol_num].index('**')-1
index_num = bef_splo.index(base_struc[bef_best_sol_num][re_fi_x])
thir_list.append(bef_str_splo[index_num])
else:
index_num = list_num.index(base_struc[index_base_Struc][re_sec])
list_num.remove(list_num[index_num])
thir_list.append(list_str[index_num])
list_str.remove(list_str[index_num])
if '(***)' in base_struc[best_project]:
re_fi_x = base_struc[bef_best_sol_num].index('***')-1
index_num = bef_splo.index(base_struc[bef_best_sol_num][re_fi_x])
thir_list.append(bef_str_splo[index_num])
else:
index_num = list_num.index(base_struc[index_base_Struc][re_third])
list_num.remove(list_num[index_num])
thir_list.append(list_str[index_num])
list_str.remove(list_str[index_num])
else:
thir_list = []
index_num = list_num.index(base_struc[index_base_Struc][re_first])
list_num.remove(list_num[index_num])
thir_list.append(list_str[index_num])
list_str.remove(list_str[index_num])
index_num = list_num.index(base_struc[index_base_Struc][re_sec])
list_num.remove(list_num[index_num])
thir_list.append(list_str[index_num])
list_str.remove(list_str[index_num])
index_num = list_num.index(base_struc[index_base_Struc][re_third])
list_num.remove(list_num[index_num])
thir_list.append(list_str[index_num])
list_str.remove(list_str[index_num])
return thir_list
index_sp_max = []
for i in range(len(spl_list)):
r = difflib.SequenceMatcher(None, string, spl_list[i][0]).ratio()
index_sp_max.append(r)
thir_list = []
if max(smil_list) > 0.8 and all_right==True:
print("good")
thir_list = fous(best_project, ss, slotList)
print(thir_list)
bef_list = list(thir_list)
bef_best_sol_num = best_project
bef_str_splo = list(sc)
bef_splo = list(slotList_c)
jianli_relation(thir_list[0], thir_list[1], thir_list[2])
# 只手动插入带星元素
elif max(index_sp_max) > 0.9:
thir_list = list(spl_list[index_sp_max.index(max(index_sp_max))][1])
jianli_relation(thir_list[0], thir_list[1], thir_list[2])
bef_list = list(thir_list)
else:
print("bad")
print(ss, slotList)
print("输入关系")
qui = input("是否放弃 Y/N")
if qui == "Y":
break
else:
can_ = input("进行残缺句处理还是碎句处理 C/S or other")
if can_ == "C":
print("这里是残缺句处理")
print("上次句子:" + str(bef_str_splo))
print("上次结构" + str(bef_splo))
print("上次最佳" + str(base_struc[bef_best_sol_num]))
print("本次句子" + str(str_slotList))
print("本次结构" + str(slotList))
while True:
inp = input("nre")
if inp == "q":
break
thir_list.append(inp)
ii=0
if thir_list[0] in bef_str_splo:
slotList.append('(*)')
else:
add_fir = ss.index(thir_list[0]) + 1
slotList.insert(add_fir+ii, '*')
ii=ii+1
if thir_list[1] in bef_str_splo:
slotList.append('(**)')
else:
add_sec = ss.index(thir_list[1]) + 1
slotList.insert(add_sec+ii, '**')
ii=ii+1
if thir_list[2] in bef_str_splo:
slotList.append('(***)')
else:
add_third = ss.index(thir_list[2]) + 1
slotList.insert(add_third+ii, '***')
ii=ii+1
base_struc.append(slotList)
jianli_relation(thir_list[0], thir_list[1], thir_list[2])
elif can_=="S":
index_sp_max = []
for i in range(len(spl_list)):
r = difflib.SequenceMatcher(None, string, spl_list[i][0]).ratio()
index_sp_max.append(r)
if max(index_sp_max) > 0.9:
thir_list=list(spl_list[index_sp_max.index(max(index_sp_max))][1])
jianli_relation(thir_list[0], thir_list[1], thir_list[2])
bef_list = list(thir_list)
else:
print("目前元素小于3个,且无相关记忆,是否放弃该句子 Y/N")
panduan = input("输入你的判断")
if panduan == "Y":
print("wow")
else:
print(str_slotList)
print(bef_list)
thir_list.append(input("输入三元组第一个元素"))
thir_list.append(input("输入三元组的第二个元素"))
thir_list.append(input("输入三元组的第三个元素"))
jianli_relation(thir_list[0], thir_list[1], thir_list[2])
bef_list = thir_list
spl_list.append((string, thir_list))
else:
while True:
inp = input("nre")
if inp == "q":
break
thir_list.append(inp)
add_fir = ss.index(thir_list[0]) + 1
add_sec = ss.index(thir_list[1]) + 1
add_third = ss.index(thir_list[2]) + 1
slotList.insert(add_fir, '*')
slotList.insert(add_sec + 1, '**')
slotList.insert(add_third + 2, '***')
base_struc.append(slotList)
jianli_relation(thir_list[0], thir_list[1], thir_list[2])
bef_list = list(thir_list)
bef_best_sol_num = best_project
bef_str_splo = list(sc)
bef_splo = list(slotList_c)
|