中華古詩詞知識圖譜之實體關係構建&匯入neo4j資料庫
阿新 • • 發佈:2022-01-02
實體分析
詩名實體
屬性
包含:作詩時間,詩名,內容,翻譯,背景。
關係
實體1 | 關係 | 實體2 |
詩名 | 形式 | 詩詞形式 |
詩名 | 作者 | 詩人 |
詩名 | 分類 | 類別 |
詩名 | 詞牌名 | 詞牌名 |
詩名 | 曲牌名 | 曲牌名 |
詩名 | 朝代 | 朝代 |
詩人實體
屬性
包含:出生時間,頭像連結,去世時間,詩詞數量,字,號,名字,簡介。
關係
實體1 | 關係 | 實體2 |
詩人 | 好友 | 詩人 |
詩人 | 合稱 | 詩人合稱 |
詩人 | 軌跡 | 地點 |
詩人 | 寫作 | 詩名 |
詩人 | 朝代 | 朝代 |
朝代實體
屬性
包含:朝代名稱(唐宋元明清)
關係
實體1 | 關係 | 實體2 |
朝代 | 包含 | 詩人 |
朝代 | 包含 | 詩名 |
類別實體
屬性
包含:類別名稱(寫景,抒懷,。。。)
關係
實體1 | 關係 | 實體2 |
類別 | 包含 | 詩名 |
詩詞形式實體
屬性
包含:詩詞形式名稱(五言律詩,五言絕句,五言,七言律詩,七言絕句,七言)
關係
實體1 | 關係 | 實體2 |
詩詞形式 | 包含 | 詩名 |
詞牌名實體
屬性
包含:詞牌名名稱
關係
實體1 | 關係 | 實體2 |
詞牌名 | 包含 | 詩名 |
曲牌名實體
屬性
包含:曲牌名名稱
關係
實體1 | 關係 | 實體2 |
曲牌名 | 包含 | 詩名 |
詩人合稱實體
屬性
詩人合稱名稱
關係
實體1 |
關係 | 實體2 |
詩人合稱 | 包含 | 詩人 |
地點實體
屬性
包含:古代地點名稱,經緯度,現今名稱
事件實體
屬性
包含:時間,事件名稱,地點
詩句實體與關鍵字實體
屬性
資料內容,關鍵字內容
關係
實體1 | 關係 | 實體2 |
詩句 | 關鍵字 | 字 |
字 | 詩句 | 詩句 |
實體構建
構建順序
構建的原則:
先構建單個實體,不易發生多種關係的單個實體。
例如:類別,詩詞形式,詞牌名,曲牌名,朝代,詩人合稱,軌跡,事件
多關係實體:詩人,詩名
類別實體構建
create_tag.py
import pandas as pd
import numpy as np
import re
from py2neo import Node,Relationship,Graph,NodeMatcher,RelationshipMatcher
# 建立節點
def CreateNode(m_graph,m_label,m_attrs):
m_n="_.name="+"\'"+m_attrs['name']+"\'"
matcher = NodeMatcher(m_graph)
re_value = matcher.match(m_label).where(m_n).first()
#print(re_value)
if re_value is None:
m_mode = Node(m_label,**m_attrs)
n = graph.create(m_mode)
return n
return None
# 查詢節點
def MatchNode(m_graph,m_label,m_attrs):
m_n="_.name="+"\'"+m_attrs['name']+"\'"
matcher = NodeMatcher(m_graph)
re_value = matcher.match(m_label).where(m_n).first()
return re_value
# 建立關係
def CreateRelationship(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name):
reValue1 = MatchNode(m_graph,m_label1,m_attrs1)
reValue2 = MatchNode(m_graph,m_label2,m_attrs2)
if reValue1 is None or reValue2 is None:
return False
m_r = Relationship(reValue1,m_r_name,reValue2)
n = graph.create(m_r)
return n
#查詢關係
def findRelationship(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name):
reValue1 = MatchNode(m_graph, m_label1, m_attrs1)
reValue2 = MatchNode(m_graph, m_label2, m_attrs2)
if reValue1 is None or reValue2 is None:
return False
m_r = Relationship(reValue1, m_r_name['name'], reValue2)
return m_r
def updateRelation(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name):
reValue1 = MatchNode(m_graph, m_label1, m_attrs1)
reValue2 = MatchNode(m_graph, m_label2, m_attrs2)
if reValue1 is None or reValue2 is None:
return False
print(m_r_name)
propertyes={'value': m_r_name['value'], 'danwei': m_r_name['danwei']}
m_r = Relationship(reValue1, m_r_name['name'], reValue2,**propertyes)
graph.merge(m_r)
#修改節點屬性
def updateNode(m_graph,m_label1,m_attrs1,new_attrs):
reValue1 = MatchNode(m_graph, m_label1, m_attrs1)
if reValue1 is None:
return False
reValue1.update(new_attrs)
graph.push(reValue1)
graph = Graph('http://localhost:7474',username='neo4j',password='fengge666')
def create_tag():
file = './data2/tag_name.xlsx'
data = pd.read_excel(file).fillna("無")
tag=list(data.tag)
tag_label="tag"
for it in tag:
attr1={"name":it}
CreateNode(graph, tag_label, attr1)
print("建立詩詞分類:"+it+"成功!!")
if __name__ == '__main__':
create_tag()
展示
詩詞形式實體構建
create_formal.py
import pandas as pd
import numpy as np
import re
from py2neo import Node,Relationship,Graph,NodeMatcher,RelationshipMatcher
# 建立節點
def CreateNode(m_graph,m_label,m_attrs):
m_n="_.name="+"\'"+m_attrs['name']+"\'"
matcher = NodeMatcher(m_graph)
re_value = matcher.match(m_label).where(m_n).first()
#print(re_value)
if re_value is None:
m_mode = Node(m_label,**m_attrs)
n = graph.create(m_mode)
return n
return None
# 查詢節點
def MatchNode(m_graph,m_label,m_attrs):
m_n="_.name="+"\'"+m_attrs['name']+"\'"
matcher = NodeMatcher(m_graph)
re_value = matcher.match(m_label).where(m_n).first()
return re_value
# 建立關係
def CreateRelationship(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name):
reValue1 = MatchNode(m_graph,m_label1,m_attrs1)
reValue2 = MatchNode(m_graph,m_label2,m_attrs2)
if reValue1 is None or reValue2 is None:
return False
m_r = Relationship(reValue1,m_r_name,reValue2)
n = graph.create(m_r)
return n
#查詢關係
def findRelationship(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name):
reValue1 = MatchNode(m_graph, m_label1, m_attrs1)
reValue2 = MatchNode(m_graph, m_label2, m_attrs2)
if reValue1 is None or reValue2 is None:
return False
m_r = Relationship(reValue1, m_r_name['name'], reValue2)
return m_r
def updateRelation(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name):
reValue1 = MatchNode(m_graph, m_label1, m_attrs1)
reValue2 = MatchNode(m_graph, m_label2, m_attrs2)
if reValue1 is None or reValue2 is None:
return False
print(m_r_name)
propertyes={'value': m_r_name['value'], 'danwei': m_r_name['danwei']}
m_r = Relationship(reValue1, m_r_name['name'], reValue2,**propertyes)
graph.merge(m_r)
#修改節點屬性
def updateNode(m_graph,m_label1,m_attrs1,new_attrs):
reValue1 = MatchNode(m_graph, m_label1, m_attrs1)
if reValue1 is None:
return False
reValue1.update(new_attrs)
graph.push(reValue1)
graph = Graph('http://localhost:7474',username='neo4j',password='fengge666')
def create_formal():
formal=['七言','五言','七言律詩','七言絕句','五言律詩','五言絕句']
formal_label="formal"
for it in formal:
attr1={"name":it}
CreateNode(graph, formal_label, attr1)
print("建立詩詞形式:"+it+"成功!!")
if __name__ == '__main__':
create_formal()
展示
詞牌名與曲牌名實體構建
import pandas as pd
import numpy as np
import re
from py2neo import Node,Relationship,Graph,NodeMatcher,RelationshipMatcher
# 建立節點
def CreateNode(m_graph,m_label,m_attrs):
m_n="_.name="+"\'"+m_attrs['name']+"\'"
matcher = NodeMatcher(m_graph)
re_value = matcher.match(m_label).where(m_n).first()
#print(re_value)
if re_value is None:
m_mode = Node(m_label,**m_attrs)
n = graph.create(m_mode)
return n
return None
# 查詢節點
def MatchNode(m_graph,m_label,m_attrs):
m_n="_.name="+"\'"+m_attrs['name']+"\'"
matcher = NodeMatcher(m_graph)
re_value = matcher.match(m_label).where(m_n).first()
return re_value
# 建立關係
def CreateRelationship(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name):
reValue1 = MatchNode(m_graph,m_label1,m_attrs1)
reValue2 = MatchNode(m_graph,m_label2,m_attrs2)
if reValue1 is None or reValue2 is None:
return False
m_r = Relationship(reValue1,m_r_name,reValue2)
n = graph.create(m_r)
return n
#查詢關係
def findRelationship(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name):
reValue1 = MatchNode(m_graph, m_label1, m_attrs1)
reValue2 = MatchNode(m_graph, m_label2, m_attrs2)
if reValue1 is None or reValue2 is None:
return False
m_r = Relationship(reValue1, m_r_name['name'], reValue2)
return m_r
def updateRelation(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name):
reValue1 = MatchNode(m_graph, m_label1, m_attrs1)
reValue2 = MatchNode(m_graph, m_label2, m_attrs2)
if reValue1 is None or reValue2 is None:
return False
print(m_r_name)
propertyes={'value': m_r_name['value'], 'danwei': m_r_name['danwei']}
m_r = Relationship(reValue1, m_r_name['name'], reValue2,**propertyes)
graph.merge(m_r)
#修改節點屬性
def updateNode(m_graph,m_label1,m_attrs1,new_attrs):
reValue1 = MatchNode(m_graph, m_label1, m_attrs1)
if reValue1 is None:
return False
reValue1.update(new_attrs)
graph.push(reValue1)
graph = Graph('http://localhost:7474',username='neo4j',password='fengge666')
def create_pai_name():
file = './data2/cipai_name.xlsx'
data = pd.read_excel(file).fillna("無")
title=list(data.title)
cipai_label="ci_pai"
for it in title:
attr1={"name":it}
CreateNode(graph, cipai_label, attr1)
print("建立詞牌名"+it+"成功!!")
file2 = './data2/qupai_name.xlsx'
data2 = pd.read_excel(file2).fillna("無")
title2 = list(data2.qu_name)
qupai_label = "qu_pai"
for it in title2:
attr1 = {"name": it}
CreateNode(graph, qupai_label, attr1)
print("建立曲牌名" + it + "成功!!")
if __name__ == '__main__':
create_pai_name()
展示
飛花令關鍵字實體構建
import pandas as pd
import numpy as np
import re
from py2neo import Node,Relationship,Graph,NodeMatcher,RelationshipMatcher
# 建立節點
def CreateNode(m_graph,m_label,m_attrs):
m_n="_.name="+"\'"+m_attrs['name']+"\'"
matcher = NodeMatcher(m_graph)
re_value = matcher.match(m_label).where(m_n).first()
#print(re_value)
if re_value is None:
m_mode = Node(m_label,**m_attrs)
n = graph.create(m_mode)
return n
return None
# 查詢節點
def MatchNode(m_graph,m_label,m_attrs):
m_n="_.name="+"\'"+m_attrs['name']+"\'"
matcher = NodeMatcher(m_graph)
re_value = matcher.match(m_label).where(m_n).first()
return re_value
# 建立關係
def CreateRelationship(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name):
reValue1 = MatchNode(m_graph,m_label1,m_attrs1)
reValue2 = MatchNode(m_graph,m_label2,m_attrs2)
if reValue1 is None or reValue2 is None:
return False
m_r = Relationship(reValue1,m_r_name,reValue2)
n = graph.create(m_r)
return n
#查詢關係
def findRelationship(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name):
reValue1 = MatchNode(m_graph, m_label1, m_attrs1)
reValue2 = MatchNode(m_graph, m_label2, m_attrs2)
if reValue1 is None or reValue2 is None:
return False
m_r = Relationship(reValue1, m_r_name['name'], reValue2)
return m_r
def updateRelation(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name):
reValue1 = MatchNode(m_graph, m_label1, m_attrs1)
reValue2 = MatchNode(m_graph, m_label2, m_attrs2)
if reValue1 is None or reValue2 is None:
return False
print(m_r_name)
propertyes={'value': m_r_name['value'], 'danwei': m_r_name['danwei']}
m_r = Relationship(reValue1, m_r_name['name'], reValue2,**propertyes)
graph.merge(m_r)
#修改節點屬性
def updateNode(m_graph,m_label1,m_attrs1,new_attrs):
reValue1 = MatchNode(m_graph, m_label1, m_attrs1)
if reValue1 is None:
return False
reValue1.update(new_attrs)
graph.push(reValue1)
graph = Graph('http://localhost:7474',username='neo4j',password='fengge666')
def create_word():
file = './data2/word.xlsx'
data = pd.read_excel(file).fillna("無")
word=list(data.word)
word_label="word"
for it in word:
attr1={"name":it}
CreateNode(graph, word_label, attr1)
print("建立飛花令:"+it+"成功!!")
if __name__ == '__main__':
create_word()
展示
詩句實體構建
import pandas as pd
import numpy as np
import re
from py2neo import Node,Relationship,Graph,NodeMatcher,RelationshipMatcher
# 建立節點
def CreateNode(m_graph,m_label,m_attrs):
#根絕節點name屬性,查詢節點
m_n="_.name="+"\'"+m_attrs['name']+"\'"
matcher = NodeMatcher(m_graph)
re_value = matcher.match(m_label).where(m_n).first()
#print(re_value)
if re_value is None:
m_mode = Node(m_label,**m_attrs)
n = graph.create(m_mode)
return n
return None
# 查詢節點
def MatchNode(m_graph,m_label,m_attrs):
m_n="_.name="+"\'"+m_attrs['name']+"\'"
matcher = NodeMatcher(m_graph)
re_value = matcher.match(m_label).where(m_n).first()
return re_value
# 建立關係
def CreateRelationship(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name):
reValue1 = MatchNode(m_graph,m_label1,m_attrs1)
reValue2 = MatchNode(m_graph,m_label2,m_attrs2)
if reValue1 is None or reValue2 is None:
return False
m_r = Relationship(reValue1,m_r_name,reValue2)
n = graph.create(m_r)
return n
#查詢關係
def findRelationship(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name):
reValue1 = MatchNode(m_graph, m_label1, m_attrs1)
reValue2 = MatchNode(m_graph, m_label2, m_attrs2)
if reValue1 is None or reValue2 is None:
return False
m_r = Relationship(reValue1, m_r_name['name'], reValue2)
return m_r
def updateRelation(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name):
reValue1 = MatchNode(m_graph, m_label1, m_attrs1)
reValue2 = MatchNode(m_graph, m_label2, m_attrs2)
if reValue1 is None or reValue2 is None:
return False
print(m_r_name)
propertyes={'value': m_r_name['value'], 'danwei': m_r_name['danwei']}
m_r = Relationship(reValue1, m_r_name['name'], reValue2,**propertyes)
graph.merge(m_r)
#修改節點屬性
def updateNode(m_graph,m_label1,m_attrs1,new_attrs):
reValue1 = MatchNode(m_graph, m_label1, m_attrs1)
if reValue1 is None:
return False
reValue1.update(new_attrs)
graph.push(reValue1)
graph = Graph('http://localhost:7474',username='neo4j',password='fengge666')
#獲取指定資料夾下的excel
import os
def get_filename(path,filetype): # 輸入路徑、檔案型別例如'.xlsx'
name = []
for root,dirs,files in os.walk(path):
for i in files:
if os.path.splitext(i)[1]==filetype:
name.append(i)
return name # 輸出由有後綴的檔名組成的列表
def create_sentence():
file = 'sentences/'
lists = get_filename(file, '.xlsx')
for it in lists:
newfile = file + it
print(newfile)
# 獲取詩詞內容
data = pd.read_excel(newfile).fillna("無")
sentens = list(data.sentens)
author = list(data.author)
title = list(data.title)
keys = list(data.word)
sentence_label='sentence'
word_label='word'
if len(sentens)>50000:
lenth=50000
else:
lenth=len(sentens)
for i in range(lenth):
print("第" + str(i) + "個")
attr1 = {"name": sentens[i], "author": author[i], "title": title[i]}
CreateNode(graph, sentence_label, attr1)
print("建立詩句:" + sentens[i] + "成功!!")
word_list=keys[i].split(',')
for it in word_list:
attr2 = {"name": it}
# 建立關係
m_r_name1 = "關鍵字"
reValue1 = CreateRelationship(graph, sentence_label, attr1, word_label, attr2, m_r_name1)
print("建立關係:" + sentens[i] + "-關鍵字-" + it + "成功")
m_r_name2 = "詩句"
reValue2 = CreateRelationship(graph, word_label, attr2, sentence_label, attr1, m_r_name2)
print("建立關係:" + it + "-詩句-" + sentens[i] + "成功")
if __name__ == '__main__':
create_sentence()
展示
詩人與朝代實體構建
import pandas as pd
import numpy as np
import re
from py2neo import Node,Relationship,Graph,NodeMatcher,RelationshipMatcher
# 建立節點
def CreateNode(m_graph,m_label,m_attrs):
m_n="_.name="+"\'"+m_attrs['name']+"\'"
matcher = NodeMatcher(m_graph)
re_value = matcher.match(m_label).where(m_n).first()
#print(re_value)
if re_value is None:
m_mode = Node(m_label,**m_attrs)
n = graph.create(m_mode)
return n
return None
# 查詢節點
def MatchNode(m_graph,m_label,m_attrs):
m_n="_.name="+"\'"+m_attrs['name']+"\'"
matcher = NodeMatcher(m_graph)
re_value = matcher.match(m_label).where(m_n).first()
return re_value
# 建立關係
def CreateRelationship(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name):
reValue1 = MatchNode(m_graph,m_label1,m_attrs1)
reValue2 = MatchNode(m_graph,m_label2,m_attrs2)
if reValue1 is None or reValue2 is None:
return False
m_r = Relationship(reValue1,m_r_name,reValue2)
n = graph.create(m_r)
return n
#查詢關係
def findRelationship(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name):
reValue1 = MatchNode(m_graph, m_label1, m_attrs1)
reValue2 = MatchNode(m_graph, m_label2, m_attrs2)
if reValue1 is None or reValue2 is None:
return False
m_r = Relationship(reValue1, m_r_name['name'], reValue2)
return m_r
def updateRelation(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name):
reValue1 = MatchNode(m_graph, m_label1, m_attrs1)
reValue2 = MatchNode(m_graph, m_label2, m_attrs2)
if reValue1 is None or reValue2 is None:
return False
print(m_r_name)
propertyes={'value': m_r_name['value'], 'danwei': m_r_name['danwei']}
m_r = Relationship(reValue1, m_r_name['name'], reValue2,**propertyes)
graph.merge(m_r)
#修改節點屬性
def updateNode(m_graph,m_label1,m_attrs1,new_attrs):
reValue1 = MatchNode(m_graph, m_label1, m_attrs1)
if reValue1 is None:
return False
reValue1.update(new_attrs)
graph.push(reValue1)
graph = Graph('http://localhost:7474',username='neo4j',password='fengge666')
def create_author():
file='./data2/author.xlsx'
data=pd.read_excel(file).fillna("無")
author=list(data.author)
produce=list(data.produce)
num=list(data.num)
src=list(data.src)
desty=list(data.desty)
bg_time=list(data.begin_time)
ed_time=list(data.end_time)
zi_list=list(data.zi)
hao_list=list(data.hao)
author_label='author'
desty_label='desty'
for i in range(len(author)):
print("第"+str(i)+"個")
attr1 = {"name": author[i], "produce": produce[i], "num": num[i],
"src": src[i],"bg_time":bg_time[i],"ed_time":ed_time[i],"zi":zi_list[i],"hao":hao_list[i]}
CreateNode(graph, author_label, attr1)
print("建立詩人:" + author[i] + "成功!!")
attr2={"name":desty[i]}
if MatchNode(graph,desty_label,attr2)==None:
CreateNode(graph,desty_label,attr2)
print("建立朝代:"+desty[i]+"成功!!")
#建立關係
m_r_name1 = "朝代"
reValue1 = CreateRelationship(graph, author_label, attr1, desty_label, attr2, m_r_name1)
print("建立關係:"+author[i]+"-所屬朝代-"+desty[i]+"成功")
m_r_name2 = "包含"
reValue2 = CreateRelationship(graph,desty_label, attr2, author_label, attr1, m_r_name2)
print("建立關係:" + desty[i] + "-包含-" + author[i] + "成功")
if __name__ == '__main__':
create_author()
展示
詩人好友關係構建
import pandas as pd
import numpy as np
import re
from py2neo import Node,Relationship,Graph,NodeMatcher,RelationshipMatcher
# 建立節點
def CreateNode(m_graph,m_label,m_attrs):
#根絕節點name屬性,查詢節點
m_n="_.name="+"\'"+m_attrs['name']+"\'"
matcher = NodeMatcher(m_graph)
re_value = matcher.match(m_label).where(m_n).first()
#print(re_value)
if re_value is None:
m_mode = Node(m_label,**m_attrs)
n = graph.create(m_mode)
return n
return None
# 查詢節點
def MatchNode(m_graph,m_label,m_attrs):
m_n="_.name="+"\'"+m_attrs['name']+"\'"
matcher = NodeMatcher(m_graph)
re_value = matcher.match(m_label).where(m_n).first()
return re_value
# 建立關係
def CreateRelationship(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name):
reValue1 = MatchNode(m_graph,m_label1,m_attrs1)
reValue2 = MatchNode(m_graph,m_label2,m_attrs2)
if reValue1 is None or reValue2 is None:
return False
m_r = Relationship(reValue1,m_r_name,reValue2)
n = graph.create(m_r)
return n
#查詢關係
def findRelationship(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name):
reValue1 = MatchNode(m_graph, m_label1, m_attrs1)
reValue2 = MatchNode(m_graph, m_label2, m_attrs2)
if reValue1 is None or reValue2 is None:
return False
m_r = Relationship(reValue1, m_r_name['name'], reValue2)
return m_r
def updateRelation(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name):
reValue1 = MatchNode(m_graph, m_label1, m_attrs1)
reValue2 = MatchNode(m_graph, m_label2, m_attrs2)
if reValue1 is None or reValue2 is None:
return False
print(m_r_name)
propertyes={'value': m_r_name['value'], 'danwei': m_r_name['danwei']}
m_r = Relationship(reValue1, m_r_name['name'], reValue2,**propertyes)
graph.merge(m_r)
#修改節點屬性
def updateNode(m_graph,m_label1,m_attrs1,new_attrs):
reValue1 = MatchNode(m_graph, m_label1, m_attrs1)
if reValue1 is None:
return False
reValue1.update(new_attrs)
graph.push(reValue1)
graph = Graph('http://localhost:7474',username='neo4j',password='fengge666')
def create_friend():
file = 'data2/friend_ming.xlsx'
# 獲取詩詞內容
data = pd.read_excel(file).fillna("無")
author=list(data.author)
friend=list(data.friend)
author_label='author'
for i in range(len(author)):
print("第" + str(i) + "個")
attr1 = {"name": author[i]}
if MatchNode(graph, author_label, attr1) != None:
friend_list=friend[i].split(',')
for it in friend_list:
attr2 = {"name": it}
if MatchNode(graph, author_label, attr2) != None and it!=author[i]:
# 建立關係
m_r_name1 = "好友"
reValue1 = CreateRelationship(graph, author_label, attr1, author_label, attr2, m_r_name1)
print("建立關係:" + author[i] + "-好友-" + it + "成功")
m_r_name2 = "好友"
reValue2 = CreateRelationship(graph, author_label, attr2, author_label, attr1, m_r_name2)
print("建立關係:" + it + "-好友-" + author[i] + "成功")
if __name__ == '__main__':
create_friend()
展示
詩人合稱實體構建
import pandas as pd
import numpy as np
import re
from py2neo import Node,Relationship,Graph,NodeMatcher,RelationshipMatcher
# 建立節點
def CreateNode(m_graph,m_label,m_attrs):
m_n="_.name="+"\'"+m_attrs['name']+"\'"
matcher = NodeMatcher(m_graph)
re_value = matcher.match(m_label).where(m_n).first()
#print(re_value)
if re_value is None:
m_mode = Node(m_label,**m_attrs)
n = graph.create(m_mode)
return n
return None
# 查詢節點
def MatchNode(m_graph,m_label,m_attrs):
m_n="_.name="+"\'"+m_attrs['name']+"\'"
matcher = NodeMatcher(m_graph)
re_value = matcher.match(m_label).where(m_n).first()
return re_value
# 建立關係
def CreateRelationship(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name):
reValue1 = MatchNode(m_graph,m_label1,m_attrs1)
reValue2 = MatchNode(m_graph,m_label2,m_attrs2)
if reValue1 is None or reValue2 is None:
return False
m_r = Relationship(reValue1,m_r_name,reValue2)
n = graph.create(m_r)
return n
#查詢關係
def findRelationship(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name):
reValue1 = MatchNode(m_graph, m_label1, m_attrs1)
reValue2 = MatchNode(m_graph, m_label2, m_attrs2)
if reValue1 is None or reValue2 is None:
return False
m_r = Relationship(reValue1, m_r_name['name'], reValue2)
return m_r
def updateRelation(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name):
reValue1 = MatchNode(m_graph, m_label1, m_attrs1)
reValue2 = MatchNode(m_graph, m_label2, m_attrs2)
if reValue1 is None or reValue2 is None:
return False
print(m_r_name)
propertyes={'value': m_r_name['value'], 'danwei': m_r_name['danwei']}
m_r = Relationship(reValue1, m_r_name['name'], reValue2,**propertyes)
graph.merge(m_r)
#修改節點屬性
def updateNode(m_graph,m_label1,m_attrs1,new_attrs):
reValue1 = MatchNode(m_graph, m_label1, m_attrs1)
if reValue1 is None:
return False
reValue1.update(new_attrs)
graph.push(reValue1)
graph = Graph('http://localhost:7474',username='neo4j',password='fengge666')
def create_common_name():
file = './data2/common_name.xlsx'
data = pd.read_excel(file).fillna("無")
hc=list(data.hc)
author=list(data.author)
common_name_label="common_name"
author_label="author"
for i in range(len(hc)):
common_name=hc[i]
authors=author[i].split(',')
attr1={"name":common_name}
CreateNode(graph, common_name_label, attr1)
print("建立合稱:"+common_name+"成功!!")
for it in authors:
attr2={"name":it}
# 建立關係
m_r_name1 = "合稱"
reValue1 = CreateRelationship(graph, author_label, attr2, common_name_label, attr1, m_r_name1)
print("建立關係:" + it + "-合稱-" + common_name + "成功")
# 建立關係
m_r_name2 = "包含"
reValue2 = CreateRelationship(graph,common_name_label, attr1, author_label, attr2, m_r_name2)
print("建立關係:" + common_name + "-包含-" + it+ "成功")
if __name__ == '__main__':
create_common_name()
展示
詩人事蹟實體構建
import pandas as pd
import numpy as np
import re
from py2neo import Node,Relationship,Graph,NodeMatcher,RelationshipMatcher
# 建立節點
def CreateNode(m_graph,m_label,m_attrs):
#根絕節點name屬性,查詢節點
m_n="_.name="+"\'"+m_attrs['name']+"\'"
matcher = NodeMatcher(m_graph)
re_value = matcher.match(m_label).where(m_n).first()
#print(re_value)
if re_value is None:
m_mode = Node(m_label,**m_attrs)
n = graph.create(m_mode)
return n
return None
# 查詢節點
def MatchNode(m_graph,m_label,m_attrs):
m_n="_.name="+"\'"+m_attrs['name']+"\'"
matcher = NodeMatcher(m_graph)
re_value = matcher.match(m_label).where(m_n).first()
return re_value
# 建立關係
def CreateRelationship(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name):
reValue1 = MatchNode(m_graph,m_label1,m_attrs1)
reValue2 = MatchNode(m_graph,m_label2,m_attrs2)
if reValue1 is None or reValue2 is None:
return False
m_r = Relationship(reValue1,m_r_name,reValue2)
n = graph.create(m_r)
return n
#查詢關係
def findRelationship(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name):
reValue1 = MatchNode(m_graph, m_label1, m_attrs1)
reValue2 = MatchNode(m_graph, m_label2, m_attrs2)
if reValue1 is None or reValue2 is None:
return False
m_r = Relationship(reValue1, m_r_name['name'], reValue2)
return m_r
def updateRelation(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name):
reValue1 = MatchNode(m_graph, m_label1, m_attrs1)
reValue2 = MatchNode(m_graph, m_label2, m_attrs2)
if reValue1 is None or reValue2 is None:
return False
print(m_r_name)
propertyes={'value': m_r_name['value'], 'danwei': m_r_name['danwei']}
m_r = Relationship(reValue1, m_r_name['name'], reValue2,**propertyes)
graph.merge(m_r)
#修改節點屬性
def updateNode(m_graph,m_label1,m_attrs1,new_attrs):
reValue1 = MatchNode(m_graph, m_label1, m_attrs1)
if reValue1 is None:
return False
reValue1.update(new_attrs)
graph.push(reValue1)
graph = Graph('http://localhost:7474',username='neo4j',password='fengge666')
#獲取指定資料夾下的excel
import os
def get_filename(path,filetype): # 輸入路徑、檔案型別例如'.xlsx'
name = []
for root,dirs,files in os.walk(path):
for i in files:
if os.path.splitext(i)[1]==filetype:
name.append(i)
return name # 輸出由有後綴的檔名組成的列表
def read_real_where_name():
file='data2/gu_jin_lng_lat.xlsx'
data=pd.read_excel(file)
gu_name=list(data.gu_name)
return gu_name
def read_where(author,file,gu_name):
data=pd.read_excel(file)
date=list(data.data)
where_name=list(data.wheres)
things=list(data.things)
for i in range(len(date)):
#處理地區,滿足我們需要的地區條件
where_list=where_name[i].split(',')
for it in where_list:
if it in gu_name and it!='無':
attr1={"name":things[i],"date":date[i],"where_name":where_name[i]}
CreateNode(graph, things_label, attr1)
print("建立事件:" + things[i] + "-成功!!")
attr2 = {"name": author}
# 建立關係
m_r_name1 = "事蹟"
reValue1 = CreateRelationship(graph, author_label, attr2, things_label, attr1, m_r_name1)
print("建立關係:" + author + "-事蹟-" + things[i] + "-成功")
break
if __name__ == '__main__':
file = 'author/'
lists = get_filename(file, '.xlsx')
gu_name = read_real_where_name()
author_label='author'
things_label='things'
for it in lists:
newfile = file + it
print(newfile)
author = it.split('.')[0]
print(author)
read_where(author,newfile,gu_name)
展示
詩人軌跡地點實體構建
import pandas as pd
import numpy as np
import re
from py2neo import Node,Relationship,Graph,NodeMatcher,RelationshipMatcher
# 建立節點
def CreateNode(m_graph,m_label,m_attrs):
#根絕節點name屬性,查詢節點
m_n="_.name="+"\'"+m_attrs['name']+"\'"
matcher = NodeMatcher(m_graph)
re_value = matcher.match(m_label).where(m_n).first()
#print(re_value)
if re_value is None:
m_mode = Node(m_label,**m_attrs)
n = graph.create(m_mode)
return n
return None
# 查詢節點
def MatchNode(m_graph,m_label,m_attrs):
m_n="_.name="+"\'"+m_attrs['name']+"\'"
matcher = NodeMatcher(m_graph)
re_value = matcher.match(m_label).where(m_n).first()
return re_value
# 建立關係
def CreateRelationship(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name):
reValue1 = MatchNode(m_graph,m_label1,m_attrs1)
reValue2 = MatchNode(m_graph,m_label2,m_attrs2)
if reValue1 is None or reValue2 is None:
return False
m_r = Relationship(reValue1,m_r_name,reValue2)
n = graph.create(m_r)
return n
#查詢關係
def findRelationship(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name):
reValue1 = MatchNode(m_graph, m_label1, m_attrs1)
reValue2 = MatchNode(m_graph, m_label2, m_attrs2)
if reValue1 is None or reValue2 is None:
return False
m_r = Relationship(reValue1, m_r_name['name'], reValue2)
return m_r
def updateRelation(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name):
reValue1 = MatchNode(m_graph, m_label1, m_attrs1)
reValue2 = MatchNode(m_graph, m_label2, m_attrs2)
if reValue1 is None or reValue2 is None:
return False
print(m_r_name)
propertyes={'value': m_r_name['value'], 'danwei': m_r_name['danwei']}
m_r = Relationship(reValue1, m_r_name['name'], reValue2,**propertyes)
graph.merge(m_r)
#修改節點屬性
def updateNode(m_graph,m_label1,m_attrs1,new_attrs):
reValue1 = MatchNode(m_graph, m_label1, m_attrs1)
if reValue1 is None:
return False
reValue1.update(new_attrs)
graph.push(reValue1)
graph = Graph('http://localhost:7474',username='neo4j',password='fengge666')
#獲取指定資料夾下的excel
import os
def get_filename(path,filetype): # 輸入路徑、檔案型別例如'.xlsx'
name = []
for root,dirs,files in os.walk(path):
for i in files:
if os.path.splitext(i)[1]==filetype:
name.append(i)
return name # 輸出由有後綴的檔名組成的列表
def read_real_where_name():
file='data2/gu_jin_lng_lat.xlsx'
data=pd.read_excel(file)
gu_name=list(data.gu_name)
return gu_name
def read_gu_dict():
file = 'data2/gu_jin_lng_lat.xlsx'
data = pd.read_excel(file)
gu_name = list(data.gu_name)
jin_name=list(data.jin_name)
lng=list(data.lng)
lat=list(data.lat)
gu_dict={}
for i in range(len(gu_name)):
gu=gu_name[i]
gu_dict[gu]={"jin_name":jin_name[i],"lng":lng[i],"lat":lat[i]}
return gu_dict
def read_where(author,file,gu_name):
data=pd.read_excel(file)
wheres=data.wheres
real_where=[]
for i in range(len(wheres)):
where_name=wheres[i]
where_list=where_name.split(',')
for it in where_list:
if it in gu_name and it!='無':
real_where.append(it)
real_where=list(set(real_where))
if len(real_where)!=0:
for it in real_where:
jin=gu_dict[it]['jin_name']
lat=gu_dict[it]['lat']
lng=gu_dict[it]['lng']
attr1 = {"name":it,"jin_name": jin,"lng":lng,"lat":lat}
CreateNode(graph, where_name_label, attr1)
print("建立地點:" + it + "成功!!")
attr2 = {"name": author}
# 建立關係
m_r_name1 = "軌跡"
reValue1 = CreateRelationship(graph, author_label, attr2, where_name_label, attr1, m_r_name1)
print("建立關係:" + author + "-軌跡-" + it + "成功")
if __name__ == '__main__':
file = 'author/'
lists = get_filename(file, '.xlsx')
gu_name = read_real_where_name()
gu_dict = read_gu_dict()
author_label='author'
where_name_label='where_name'
for it in lists:
newfile = file + it
print(newfile)
author = it.split('.')[0]
read_where(author,newfile, gu_name)
展示
詩詞實體關係構建
import pandas as pd
import numpy as np
import re
from py2neo import Node,Relationship,Graph,NodeMatcher,RelationshipMatcher
# 建立節點
def CreateNode(m_graph,m_label,m_attrs):
m_n="_.name="+"\'"+m_attrs['name']+"\'"
matcher = NodeMatcher(m_graph)
re_value = matcher.match(m_label).where(m_n).first()
#print(re_value)
if re_value is None:
m_mode = Node(m_label,**m_attrs)
n = graph.create(m_mode)
return n
return None
# 查詢節點
def MatchNode(m_graph,m_label,m_attrs):
m_n="_.name="+"\'"+m_attrs['name']+"\'"
matcher = NodeMatcher(m_graph)
re_value = matcher.match(m_label).where(m_n).first()
return re_value
# 建立關係
def CreateRelationship(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name):
reValue1 = MatchNode(m_graph,m_label1,m_attrs1)
reValue2 = MatchNode(m_graph,m_label2,m_attrs2)
if reValue1 is None or reValue2 is None:
return False
m_r = Relationship(reValue1,m_r_name,reValue2)
n = graph.create(m_r)
return n
#查詢關係
def findRelationship(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name):
reValue1 = MatchNode(m_graph, m_label1, m_attrs1)
reValue2 = MatchNode(m_graph, m_label2, m_attrs2)
if reValue1 is None or reValue2 is None:
return False
m_r = Relationship(reValue1, m_r_name['name'], reValue2)
return m_r
def updateRelation(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name):
reValue1 = MatchNode(m_graph, m_label1, m_attrs1)
reValue2 = MatchNode(m_graph, m_label2, m_attrs2)
if reValue1 is None or reValue2 is None:
return False
print(m_r_name)
propertyes={'value': m_r_name['value'], 'danwei': m_r_name['danwei']}
m_r = Relationship(reValue1, m_r_name['name'], reValue2,**propertyes)
graph.merge(m_r)
#修改節點屬性
def updateNode(m_graph,m_label1,m_attrs1,new_attrs):
reValue1 = MatchNode(m_graph, m_label1, m_attrs1)
if reValue1 is None:
return False
reValue1.update(new_attrs)
graph.push(reValue1)
graph = Graph('http://localhost:7474',username='neo4j',password='fengge666')
#獲取指定資料夾下的excel
import os
def get_filename(path,filetype): # 輸入路徑、檔案型別例如'.xlsx'
name = []
for root,dirs,files in os.walk(path):
for i in files:
if os.path.splitext(i)[1]==filetype:
name.append(i)
return name # 輸出由有後綴的檔名組成的列表
def create_poem():
file = 'data/'
lists = get_filename(file, '.xlsx')
for it in lists:
newfile = file + it
print(newfile)
# 獲取詩詞內容
data = pd.read_excel(newfile).fillna("無")
title=list(data.title)
desty=list(data.desty)
author=list(data.author)
content=list(data.content)
trans_content=list(data.trans_content)
background=list(data.background)
tag=list(data.tag)
formal=list(data.formal)
date=list(data.data)
ci_name=list(data.ci_name)
qu_name=list(data.qu_name)
poem_label='poem'
author_label='author'
desty_label='desty'
formal_label='formal'
tag_label='tag'
cipai_label='ci_pai'
qupai_label='qu_pai'
for i in range(len(title)):
print("第"+str(i)+"個")
attr1 = {"name": title[i], "content": content[i], "trans_content": trans_content[i],
"background": background[i],"date":date[i]}
CreateNode(graph, poem_label, attr1)
print("建立詩詞:" + title[i] + "成功!!")
if tag[i]!='無':
tag_list=tag[i].split(',')
for it in tag_list:
attr2={"name":it}
# 建立關係
m_r_name1 = "分類"
reValue1 = CreateRelationship(graph, poem_label, attr1, tag_label, attr2, m_r_name1)
print("建立關係:" + title[i] + "-所屬類別-" + it + "成功")
m_r_name2 = "包含"
reValue2 = CreateRelationship(graph, tag_label, attr2, poem_label, attr1, m_r_name2)
print("建立關係:" + it + "-包含-" + title[i] + "成功")
if formal[i]!='無':
attr2={"name":formal[i]}
# 建立關係
m_r_name1 = "形式"
reValue1 = CreateRelationship(graph, poem_label, attr1, formal_label, attr2, m_r_name1)
print("建立關係:" + title[i] + "-所屬形式-" + formal[i] + "成功")
m_r_name2 = "包含"
reValue2 = CreateRelationship(graph, formal_label, attr2, poem_label, attr1, m_r_name2)
print("建立關係:" + formal[i] + "-包含-" + title[i] + "成功")
if ci_name[i]!='無':
attr2 = {"name": ci_name[i]}
if MatchNode(graph, cipai_label, attr2) == None:
CreateNode(graph, cipai_label, attr2)
print("建立詞牌名:" + ci_name[i] + "成功!!")
# 建立關係
m_r_name1 = "詞牌名"
reValue1 = CreateRelationship(graph, poem_label, attr1, cipai_label, attr2, m_r_name1)
print("建立關係:" + title[i] + "-詞牌名-" + ci_name[i] + "成功")
m_r_name2 = "包含"
reValue2 = CreateRelationship(graph, cipai_label, attr2, poem_label, attr1, m_r_name2)
print("建立關係:" + ci_name[i] + "-包含-" + title[i] + "成功")
if qu_name[i]!='無':
attr2 = {"name": qu_name[i]}
if MatchNode(graph, qupai_label, attr2) == None:
CreateNode(graph, qupai_label, attr2)
print("建立曲牌名:" + qu_name[i] + "成功!!")
# 建立關係
m_r_name1 = "曲牌名"
reValue1 = CreateRelationship(graph, poem_label, attr1, qupai_label, attr2, m_r_name1)
print("建立關係:" + title[i] + "-曲牌名-" + qu_name[i] + "成功")
m_r_name2 = "包含"
reValue2 = CreateRelationship(graph, qupai_label, attr2, poem_label, attr1, m_r_name2)
print("建立關係:" + qu_name[i] + "-包含-" + title[i] + "成功")
if author[i]!='無':
#建立作者寫作關係
attr2={"name":author[i]}
if MatchNode(graph,author_label,attr2)!=None:
#建立關係
m_r_name1 = "寫作"
reValue1 = CreateRelationship(graph, author_label, attr2, poem_label, attr1, m_r_name1)
print("建立關係:"+author[i]+"-寫作-"+title[i]+"成功")
m_r_name2 = "作者"
reValue2 = CreateRelationship(graph,poem_label, attr1, author_label, attr2, m_r_name2)
print("建立關係:" + title[i] + "-作者-" + author[i] + "成功")
if desty[i]!='無':
attr2 = {"name": desty[i]}
if MatchNode(graph, desty_label, attr2) == None:
CreateNode(graph, desty_label, attr2)
print("建立朝代:" + desty[i] + "成功!!")
# 建立關係
m_r_name1 = "朝代"
reValue1 = CreateRelationship(graph, poem_label, attr1, desty_label, attr2, m_r_name1)
print("建立關係:" + title[i] + "-所屬朝代-" + desty[i] + "成功")
m_r_name2 = "包含詩詞"
reValue2 = CreateRelationship(graph, desty_label, attr2, poem_label, attr1, m_r_name2)
print("建立關係:" + desty[i] + "-包含-" + title[i] + "成功")
if __name__ == '__main__':
create_poem()
展示
總結
實體關係構建完成,基本的古詩詞關係理清,之後可以進行相關的網頁展示。
整個關係結構圖,如下所示: