中华古诗词知识图谱之实体关系构建&导入neo4j数据库
2022/1/2 19:08:34
本文主要是介绍中华古诗词知识图谱之实体关系构建&导入neo4j数据库,对大家解决编程问题具有一定的参考价值,需要的程序猿们随着小编来一起学习吧!
实体分析
诗名实体
属性
包含:作诗时间,诗名,内容,翻译,背景。
关系
实体1 | 关系 | 实体2 |
诗名 | 形式 | 诗词形式 |
诗名 | 作者 | 诗人 |
诗名 | 分类 | 类别 |
诗名 | 词牌名 | 词牌名 |
诗名 | 曲牌名 | 曲牌名 |
诗名 | 朝代 | 朝代 |
诗人实体
属性
包含:出生时间,头像链接,去世时间,诗词数量,字,号,名字,简介。
关系
实体1 | 关系 | 实体2 |
诗人 | 好友 | 诗人 |
诗人 | 合称 | 诗人合称 |
诗人 | 轨迹 | 地点 |
诗人 | 写作 | 诗名 |
诗人 | 朝代 | 朝代 |
朝代实体
属性
包含:朝代名称(唐宋元明清)
关系
实体1 | 关系 | 实体2 |
朝代 | 包含 | 诗人 |
朝代 | 包含 | 诗名 |
类别实体
属性
包含:类别名称(写景,抒怀,。。。)
关系
实体1 | 关系 | 实体2 |
类别 | 包含 | 诗名 |
诗词形式实体
属性
包含:诗词形式名称(五言律诗,五言绝句,五言,七言律诗,七言绝句,七言)
关系
实体1 | 关系 | 实体2 |
诗词形式 | 包含 | 诗名 |
词牌名实体
属性
包含:词牌名名称
关系
实体1 | 关系 | 实体2 |
词牌名 | 包含 | 诗名 |
曲牌名实体
属性
包含:曲牌名名称
关系
实体1 | 关系 | 实体2 |
曲牌名 | 包含 | 诗名 |
诗人合称实体
属性
诗人合称名称
关系
实体1 | 关系 | 实体2 |
诗人合称 | 包含 | 诗人 |
地点实体
属性
包含:古代地点名称,经纬度,现今名称
事件实体
属性
包含:时间,事件名称,地点
诗句实体与关键字实体
属性
数据内容,关键字内容
关系
实体1 | 关系 | 实体2 |
诗句 | 关键字 | 字 |
字 | 诗句 | 诗句 |
实体构建
构建顺序
构建的原则:
先构建单个实体,不易发生多种关系的单个实体。
例如:类别,诗词形式,词牌名,曲牌名,朝代,诗人合称,轨迹,事件
多关系实体:诗人,诗名
类别实体构建
create_tag.py
import pandas as pd import numpy as np import re from py2neo import Node,Relationship,Graph,NodeMatcher,RelationshipMatcher # 创建节点 def CreateNode(m_graph,m_label,m_attrs): m_n="_.name="+"\'"+m_attrs['name']+"\'" matcher = NodeMatcher(m_graph) re_value = matcher.match(m_label).where(m_n).first() #print(re_value) if re_value is None: m_mode = Node(m_label,**m_attrs) n = graph.create(m_mode) return n return None # 查询节点 def MatchNode(m_graph,m_label,m_attrs): m_n="_.name="+"\'"+m_attrs['name']+"\'" matcher = NodeMatcher(m_graph) re_value = matcher.match(m_label).where(m_n).first() return re_value # 创建关系 def CreateRelationship(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name): reValue1 = MatchNode(m_graph,m_label1,m_attrs1) reValue2 = MatchNode(m_graph,m_label2,m_attrs2) if reValue1 is None or reValue2 is None: return False m_r = Relationship(reValue1,m_r_name,reValue2) n = graph.create(m_r) return n #查找关系 def findRelationship(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name): reValue1 = MatchNode(m_graph, m_label1, m_attrs1) reValue2 = MatchNode(m_graph, m_label2, m_attrs2) if reValue1 is None or reValue2 is None: return False m_r = Relationship(reValue1, m_r_name['name'], reValue2) return m_r def updateRelation(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name): reValue1 = MatchNode(m_graph, m_label1, m_attrs1) reValue2 = MatchNode(m_graph, m_label2, m_attrs2) if reValue1 is None or reValue2 is None: return False print(m_r_name) propertyes={'value': m_r_name['value'], 'danwei': m_r_name['danwei']} m_r = Relationship(reValue1, m_r_name['name'], reValue2,**propertyes) graph.merge(m_r) #修改节点属性 def updateNode(m_graph,m_label1,m_attrs1,new_attrs): reValue1 = MatchNode(m_graph, m_label1, m_attrs1) if reValue1 is None: return False reValue1.update(new_attrs) graph.push(reValue1) graph = Graph('http://localhost:7474',username='neo4j',password='fengge666') def create_tag(): file = './data2/tag_name.xlsx' data = pd.read_excel(file).fillna("无") tag=list(data.tag) tag_label="tag" for it in tag: attr1={"name":it} CreateNode(graph, tag_label, attr1) print("创建诗词分类:"+it+"成功!!") if __name__ == '__main__': create_tag()
展示
诗词形式实体构建
create_formal.py
import pandas as pd import numpy as np import re from py2neo import Node,Relationship,Graph,NodeMatcher,RelationshipMatcher # 创建节点 def CreateNode(m_graph,m_label,m_attrs): m_n="_.name="+"\'"+m_attrs['name']+"\'" matcher = NodeMatcher(m_graph) re_value = matcher.match(m_label).where(m_n).first() #print(re_value) if re_value is None: m_mode = Node(m_label,**m_attrs) n = graph.create(m_mode) return n return None # 查询节点 def MatchNode(m_graph,m_label,m_attrs): m_n="_.name="+"\'"+m_attrs['name']+"\'" matcher = NodeMatcher(m_graph) re_value = matcher.match(m_label).where(m_n).first() return re_value # 创建关系 def CreateRelationship(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name): reValue1 = MatchNode(m_graph,m_label1,m_attrs1) reValue2 = MatchNode(m_graph,m_label2,m_attrs2) if reValue1 is None or reValue2 is None: return False m_r = Relationship(reValue1,m_r_name,reValue2) n = graph.create(m_r) return n #查找关系 def findRelationship(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name): reValue1 = MatchNode(m_graph, m_label1, m_attrs1) reValue2 = MatchNode(m_graph, m_label2, m_attrs2) if reValue1 is None or reValue2 is None: return False m_r = Relationship(reValue1, m_r_name['name'], reValue2) return m_r def updateRelation(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name): reValue1 = MatchNode(m_graph, m_label1, m_attrs1) reValue2 = MatchNode(m_graph, m_label2, m_attrs2) if reValue1 is None or reValue2 is None: return False print(m_r_name) propertyes={'value': m_r_name['value'], 'danwei': m_r_name['danwei']} m_r = Relationship(reValue1, m_r_name['name'], reValue2,**propertyes) graph.merge(m_r) #修改节点属性 def updateNode(m_graph,m_label1,m_attrs1,new_attrs): reValue1 = MatchNode(m_graph, m_label1, m_attrs1) if reValue1 is None: return False reValue1.update(new_attrs) graph.push(reValue1) graph = Graph('http://localhost:7474',username='neo4j',password='fengge666') def create_formal(): formal=['七言','五言','七言律诗','七言绝句','五言律诗','五言绝句'] formal_label="formal" for it in formal: attr1={"name":it} CreateNode(graph, formal_label, attr1) print("创建诗词形式:"+it+"成功!!") if __name__ == '__main__': create_formal()
展示
词牌名与曲牌名实体构建
import pandas as pd import numpy as np import re from py2neo import Node,Relationship,Graph,NodeMatcher,RelationshipMatcher # 创建节点 def CreateNode(m_graph,m_label,m_attrs): m_n="_.name="+"\'"+m_attrs['name']+"\'" matcher = NodeMatcher(m_graph) re_value = matcher.match(m_label).where(m_n).first() #print(re_value) if re_value is None: m_mode = Node(m_label,**m_attrs) n = graph.create(m_mode) return n return None # 查询节点 def MatchNode(m_graph,m_label,m_attrs): m_n="_.name="+"\'"+m_attrs['name']+"\'" matcher = NodeMatcher(m_graph) re_value = matcher.match(m_label).where(m_n).first() return re_value # 创建关系 def CreateRelationship(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name): reValue1 = MatchNode(m_graph,m_label1,m_attrs1) reValue2 = MatchNode(m_graph,m_label2,m_attrs2) if reValue1 is None or reValue2 is None: return False m_r = Relationship(reValue1,m_r_name,reValue2) n = graph.create(m_r) return n #查找关系 def findRelationship(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name): reValue1 = MatchNode(m_graph, m_label1, m_attrs1) reValue2 = MatchNode(m_graph, m_label2, m_attrs2) if reValue1 is None or reValue2 is None: return False m_r = Relationship(reValue1, m_r_name['name'], reValue2) return m_r def updateRelation(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name): reValue1 = MatchNode(m_graph, m_label1, m_attrs1) reValue2 = MatchNode(m_graph, m_label2, m_attrs2) if reValue1 is None or reValue2 is None: return False print(m_r_name) propertyes={'value': m_r_name['value'], 'danwei': m_r_name['danwei']} m_r = Relationship(reValue1, m_r_name['name'], reValue2,**propertyes) graph.merge(m_r) #修改节点属性 def updateNode(m_graph,m_label1,m_attrs1,new_attrs): reValue1 = MatchNode(m_graph, m_label1, m_attrs1) if reValue1 is None: return False reValue1.update(new_attrs) graph.push(reValue1) graph = Graph('http://localhost:7474',username='neo4j',password='fengge666') def create_pai_name(): file = './data2/cipai_name.xlsx' data = pd.read_excel(file).fillna("无") title=list(data.title) cipai_label="ci_pai" for it in title: attr1={"name":it} CreateNode(graph, cipai_label, attr1) print("创建词牌名"+it+"成功!!") file2 = './data2/qupai_name.xlsx' data2 = pd.read_excel(file2).fillna("无") title2 = list(data2.qu_name) qupai_label = "qu_pai" for it in title2: attr1 = {"name": it} CreateNode(graph, qupai_label, attr1) print("创建曲牌名" + it + "成功!!") if __name__ == '__main__': create_pai_name()
展示
飞花令关键字实体构建
import pandas as pd import numpy as np import re from py2neo import Node,Relationship,Graph,NodeMatcher,RelationshipMatcher # 创建节点 def CreateNode(m_graph,m_label,m_attrs): m_n="_.name="+"\'"+m_attrs['name']+"\'" matcher = NodeMatcher(m_graph) re_value = matcher.match(m_label).where(m_n).first() #print(re_value) if re_value is None: m_mode = Node(m_label,**m_attrs) n = graph.create(m_mode) return n return None # 查询节点 def MatchNode(m_graph,m_label,m_attrs): m_n="_.name="+"\'"+m_attrs['name']+"\'" matcher = NodeMatcher(m_graph) re_value = matcher.match(m_label).where(m_n).first() return re_value # 创建关系 def CreateRelationship(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name): reValue1 = MatchNode(m_graph,m_label1,m_attrs1) reValue2 = MatchNode(m_graph,m_label2,m_attrs2) if reValue1 is None or reValue2 is None: return False m_r = Relationship(reValue1,m_r_name,reValue2) n = graph.create(m_r) return n #查找关系 def findRelationship(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name): reValue1 = MatchNode(m_graph, m_label1, m_attrs1) reValue2 = MatchNode(m_graph, m_label2, m_attrs2) if reValue1 is None or reValue2 is None: return False m_r = Relationship(reValue1, m_r_name['name'], reValue2) return m_r def updateRelation(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name): reValue1 = MatchNode(m_graph, m_label1, m_attrs1) reValue2 = MatchNode(m_graph, m_label2, m_attrs2) if reValue1 is None or reValue2 is None: return False print(m_r_name) propertyes={'value': m_r_name['value'], 'danwei': m_r_name['danwei']} m_r = Relationship(reValue1, m_r_name['name'], reValue2,**propertyes) graph.merge(m_r) #修改节点属性 def updateNode(m_graph,m_label1,m_attrs1,new_attrs): reValue1 = MatchNode(m_graph, m_label1, m_attrs1) if reValue1 is None: return False reValue1.update(new_attrs) graph.push(reValue1) graph = Graph('http://localhost:7474',username='neo4j',password='fengge666') def create_word(): file = './data2/word.xlsx' data = pd.read_excel(file).fillna("无") word=list(data.word) word_label="word" for it in word: attr1={"name":it} CreateNode(graph, word_label, attr1) print("创建飞花令:"+it+"成功!!") if __name__ == '__main__': create_word()
展示
诗句实体构建
import pandas as pd import numpy as np import re from py2neo import Node,Relationship,Graph,NodeMatcher,RelationshipMatcher # 创建节点 def CreateNode(m_graph,m_label,m_attrs): #根绝节点name属性,查找节点 m_n="_.name="+"\'"+m_attrs['name']+"\'" matcher = NodeMatcher(m_graph) re_value = matcher.match(m_label).where(m_n).first() #print(re_value) if re_value is None: m_mode = Node(m_label,**m_attrs) n = graph.create(m_mode) return n return None # 查询节点 def MatchNode(m_graph,m_label,m_attrs): m_n="_.name="+"\'"+m_attrs['name']+"\'" matcher = NodeMatcher(m_graph) re_value = matcher.match(m_label).where(m_n).first() return re_value # 创建关系 def CreateRelationship(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name): reValue1 = MatchNode(m_graph,m_label1,m_attrs1) reValue2 = MatchNode(m_graph,m_label2,m_attrs2) if reValue1 is None or reValue2 is None: return False m_r = Relationship(reValue1,m_r_name,reValue2) n = graph.create(m_r) return n #查找关系 def findRelationship(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name): reValue1 = MatchNode(m_graph, m_label1, m_attrs1) reValue2 = MatchNode(m_graph, m_label2, m_attrs2) if reValue1 is None or reValue2 is None: return False m_r = Relationship(reValue1, m_r_name['name'], reValue2) return m_r def updateRelation(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name): reValue1 = MatchNode(m_graph, m_label1, m_attrs1) reValue2 = MatchNode(m_graph, m_label2, m_attrs2) if reValue1 is None or reValue2 is None: return False print(m_r_name) propertyes={'value': m_r_name['value'], 'danwei': m_r_name['danwei']} m_r = Relationship(reValue1, m_r_name['name'], reValue2,**propertyes) graph.merge(m_r) #修改节点属性 def updateNode(m_graph,m_label1,m_attrs1,new_attrs): reValue1 = MatchNode(m_graph, m_label1, m_attrs1) if reValue1 is None: return False reValue1.update(new_attrs) graph.push(reValue1) graph = Graph('http://localhost:7474',username='neo4j',password='fengge666') #获取指定文件夹下的excel import os def get_filename(path,filetype): # 输入路径、文件类型例如'.xlsx' name = [] for root,dirs,files in os.walk(path): for i in files: if os.path.splitext(i)[1]==filetype: name.append(i) return name # 输出由有后缀的文件名组成的列表 def create_sentence(): file = 'sentences/' lists = get_filename(file, '.xlsx') for it in lists: newfile = file + it print(newfile) # 获取诗词内容 data = pd.read_excel(newfile).fillna("无") sentens = list(data.sentens) author = list(data.author) title = list(data.title) keys = list(data.word) sentence_label='sentence' word_label='word' if len(sentens)>50000: lenth=50000 else: lenth=len(sentens) for i in range(lenth): print("第" + str(i) + "个") attr1 = {"name": sentens[i], "author": author[i], "title": title[i]} CreateNode(graph, sentence_label, attr1) print("创建诗句:" + sentens[i] + "成功!!") word_list=keys[i].split(',') for it in word_list: attr2 = {"name": it} # 创建关系 m_r_name1 = "关键字" reValue1 = CreateRelationship(graph, sentence_label, attr1, word_label, attr2, m_r_name1) print("创建关系:" + sentens[i] + "-关键字-" + it + "成功") m_r_name2 = "诗句" reValue2 = CreateRelationship(graph, word_label, attr2, sentence_label, attr1, m_r_name2) print("创建关系:" + it + "-诗句-" + sentens[i] + "成功") if __name__ == '__main__': create_sentence()
展示
诗人与朝代实体构建
import pandas as pd import numpy as np import re from py2neo import Node,Relationship,Graph,NodeMatcher,RelationshipMatcher # 创建节点 def CreateNode(m_graph,m_label,m_attrs): m_n="_.name="+"\'"+m_attrs['name']+"\'" matcher = NodeMatcher(m_graph) re_value = matcher.match(m_label).where(m_n).first() #print(re_value) if re_value is None: m_mode = Node(m_label,**m_attrs) n = graph.create(m_mode) return n return None # 查询节点 def MatchNode(m_graph,m_label,m_attrs): m_n="_.name="+"\'"+m_attrs['name']+"\'" matcher = NodeMatcher(m_graph) re_value = matcher.match(m_label).where(m_n).first() return re_value # 创建关系 def CreateRelationship(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name): reValue1 = MatchNode(m_graph,m_label1,m_attrs1) reValue2 = MatchNode(m_graph,m_label2,m_attrs2) if reValue1 is None or reValue2 is None: return False m_r = Relationship(reValue1,m_r_name,reValue2) n = graph.create(m_r) return n #查找关系 def findRelationship(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name): reValue1 = MatchNode(m_graph, m_label1, m_attrs1) reValue2 = MatchNode(m_graph, m_label2, m_attrs2) if reValue1 is None or reValue2 is None: return False m_r = Relationship(reValue1, m_r_name['name'], reValue2) return m_r def updateRelation(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name): reValue1 = MatchNode(m_graph, m_label1, m_attrs1) reValue2 = MatchNode(m_graph, m_label2, m_attrs2) if reValue1 is None or reValue2 is None: return False print(m_r_name) propertyes={'value': m_r_name['value'], 'danwei': m_r_name['danwei']} m_r = Relationship(reValue1, m_r_name['name'], reValue2,**propertyes) graph.merge(m_r) #修改节点属性 def updateNode(m_graph,m_label1,m_attrs1,new_attrs): reValue1 = MatchNode(m_graph, m_label1, m_attrs1) if reValue1 is None: return False reValue1.update(new_attrs) graph.push(reValue1) graph = Graph('http://localhost:7474',username='neo4j',password='fengge666') def create_author(): file='./data2/author.xlsx' data=pd.read_excel(file).fillna("无") author=list(data.author) produce=list(data.produce) num=list(data.num) src=list(data.src) desty=list(data.desty) bg_time=list(data.begin_time) ed_time=list(data.end_time) zi_list=list(data.zi) hao_list=list(data.hao) author_label='author' desty_label='desty' for i in range(len(author)): print("第"+str(i)+"个") attr1 = {"name": author[i], "produce": produce[i], "num": num[i], "src": src[i],"bg_time":bg_time[i],"ed_time":ed_time[i],"zi":zi_list[i],"hao":hao_list[i]} CreateNode(graph, author_label, attr1) print("创建诗人:" + author[i] + "成功!!") attr2={"name":desty[i]} if MatchNode(graph,desty_label,attr2)==None: CreateNode(graph,desty_label,attr2) print("创建朝代:"+desty[i]+"成功!!") #创建关系 m_r_name1 = "朝代" reValue1 = CreateRelationship(graph, author_label, attr1, desty_label, attr2, m_r_name1) print("创建关系:"+author[i]+"-所属朝代-"+desty[i]+"成功") m_r_name2 = "包含" reValue2 = CreateRelationship(graph,desty_label, attr2, author_label, attr1, m_r_name2) print("创建关系:" + desty[i] + "-包含-" + author[i] + "成功") if __name__ == '__main__': create_author()
展示
诗人好友关系构建
import pandas as pd import numpy as np import re from py2neo import Node,Relationship,Graph,NodeMatcher,RelationshipMatcher # 创建节点 def CreateNode(m_graph,m_label,m_attrs): #根绝节点name属性,查找节点 m_n="_.name="+"\'"+m_attrs['name']+"\'" matcher = NodeMatcher(m_graph) re_value = matcher.match(m_label).where(m_n).first() #print(re_value) if re_value is None: m_mode = Node(m_label,**m_attrs) n = graph.create(m_mode) return n return None # 查询节点 def MatchNode(m_graph,m_label,m_attrs): m_n="_.name="+"\'"+m_attrs['name']+"\'" matcher = NodeMatcher(m_graph) re_value = matcher.match(m_label).where(m_n).first() return re_value # 创建关系 def CreateRelationship(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name): reValue1 = MatchNode(m_graph,m_label1,m_attrs1) reValue2 = MatchNode(m_graph,m_label2,m_attrs2) if reValue1 is None or reValue2 is None: return False m_r = Relationship(reValue1,m_r_name,reValue2) n = graph.create(m_r) return n #查找关系 def findRelationship(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name): reValue1 = MatchNode(m_graph, m_label1, m_attrs1) reValue2 = MatchNode(m_graph, m_label2, m_attrs2) if reValue1 is None or reValue2 is None: return False m_r = Relationship(reValue1, m_r_name['name'], reValue2) return m_r def updateRelation(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name): reValue1 = MatchNode(m_graph, m_label1, m_attrs1) reValue2 = MatchNode(m_graph, m_label2, m_attrs2) if reValue1 is None or reValue2 is None: return False print(m_r_name) propertyes={'value': m_r_name['value'], 'danwei': m_r_name['danwei']} m_r = Relationship(reValue1, m_r_name['name'], reValue2,**propertyes) graph.merge(m_r) #修改节点属性 def updateNode(m_graph,m_label1,m_attrs1,new_attrs): reValue1 = MatchNode(m_graph, m_label1, m_attrs1) if reValue1 is None: return False reValue1.update(new_attrs) graph.push(reValue1) graph = Graph('http://localhost:7474',username='neo4j',password='fengge666') def create_friend(): file = 'data2/friend_ming.xlsx' # 获取诗词内容 data = pd.read_excel(file).fillna("无") author=list(data.author) friend=list(data.friend) author_label='author' for i in range(len(author)): print("第" + str(i) + "个") attr1 = {"name": author[i]} if MatchNode(graph, author_label, attr1) != None: friend_list=friend[i].split(',') for it in friend_list: attr2 = {"name": it} if MatchNode(graph, author_label, attr2) != None and it!=author[i]: # 创建关系 m_r_name1 = "好友" reValue1 = CreateRelationship(graph, author_label, attr1, author_label, attr2, m_r_name1) print("创建关系:" + author[i] + "-好友-" + it + "成功") m_r_name2 = "好友" reValue2 = CreateRelationship(graph, author_label, attr2, author_label, attr1, m_r_name2) print("创建关系:" + it + "-好友-" + author[i] + "成功") if __name__ == '__main__': create_friend()
展示
诗人合称实体构建
import pandas as pd import numpy as np import re from py2neo import Node,Relationship,Graph,NodeMatcher,RelationshipMatcher # 创建节点 def CreateNode(m_graph,m_label,m_attrs): m_n="_.name="+"\'"+m_attrs['name']+"\'" matcher = NodeMatcher(m_graph) re_value = matcher.match(m_label).where(m_n).first() #print(re_value) if re_value is None: m_mode = Node(m_label,**m_attrs) n = graph.create(m_mode) return n return None # 查询节点 def MatchNode(m_graph,m_label,m_attrs): m_n="_.name="+"\'"+m_attrs['name']+"\'" matcher = NodeMatcher(m_graph) re_value = matcher.match(m_label).where(m_n).first() return re_value # 创建关系 def CreateRelationship(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name): reValue1 = MatchNode(m_graph,m_label1,m_attrs1) reValue2 = MatchNode(m_graph,m_label2,m_attrs2) if reValue1 is None or reValue2 is None: return False m_r = Relationship(reValue1,m_r_name,reValue2) n = graph.create(m_r) return n #查找关系 def findRelationship(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name): reValue1 = MatchNode(m_graph, m_label1, m_attrs1) reValue2 = MatchNode(m_graph, m_label2, m_attrs2) if reValue1 is None or reValue2 is None: return False m_r = Relationship(reValue1, m_r_name['name'], reValue2) return m_r def updateRelation(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name): reValue1 = MatchNode(m_graph, m_label1, m_attrs1) reValue2 = MatchNode(m_graph, m_label2, m_attrs2) if reValue1 is None or reValue2 is None: return False print(m_r_name) propertyes={'value': m_r_name['value'], 'danwei': m_r_name['danwei']} m_r = Relationship(reValue1, m_r_name['name'], reValue2,**propertyes) graph.merge(m_r) #修改节点属性 def updateNode(m_graph,m_label1,m_attrs1,new_attrs): reValue1 = MatchNode(m_graph, m_label1, m_attrs1) if reValue1 is None: return False reValue1.update(new_attrs) graph.push(reValue1) graph = Graph('http://localhost:7474',username='neo4j',password='fengge666') def create_common_name(): file = './data2/common_name.xlsx' data = pd.read_excel(file).fillna("无") hc=list(data.hc) author=list(data.author) common_name_label="common_name" author_label="author" for i in range(len(hc)): common_name=hc[i] authors=author[i].split(',') attr1={"name":common_name} CreateNode(graph, common_name_label, attr1) print("创建合称:"+common_name+"成功!!") for it in authors: attr2={"name":it} # 创建关系 m_r_name1 = "合称" reValue1 = CreateRelationship(graph, author_label, attr2, common_name_label, attr1, m_r_name1) print("创建关系:" + it + "-合称-" + common_name + "成功") # 创建关系 m_r_name2 = "包含" reValue2 = CreateRelationship(graph,common_name_label, attr1, author_label, attr2, m_r_name2) print("创建关系:" + common_name + "-包含-" + it+ "成功") if __name__ == '__main__': create_common_name()
展示
诗人事迹实体构建
import pandas as pd import numpy as np import re from py2neo import Node,Relationship,Graph,NodeMatcher,RelationshipMatcher # 创建节点 def CreateNode(m_graph,m_label,m_attrs): #根绝节点name属性,查找节点 m_n="_.name="+"\'"+m_attrs['name']+"\'" matcher = NodeMatcher(m_graph) re_value = matcher.match(m_label).where(m_n).first() #print(re_value) if re_value is None: m_mode = Node(m_label,**m_attrs) n = graph.create(m_mode) return n return None # 查询节点 def MatchNode(m_graph,m_label,m_attrs): m_n="_.name="+"\'"+m_attrs['name']+"\'" matcher = NodeMatcher(m_graph) re_value = matcher.match(m_label).where(m_n).first() return re_value # 创建关系 def CreateRelationship(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name): reValue1 = MatchNode(m_graph,m_label1,m_attrs1) reValue2 = MatchNode(m_graph,m_label2,m_attrs2) if reValue1 is None or reValue2 is None: return False m_r = Relationship(reValue1,m_r_name,reValue2) n = graph.create(m_r) return n #查找关系 def findRelationship(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name): reValue1 = MatchNode(m_graph, m_label1, m_attrs1) reValue2 = MatchNode(m_graph, m_label2, m_attrs2) if reValue1 is None or reValue2 is None: return False m_r = Relationship(reValue1, m_r_name['name'], reValue2) return m_r def updateRelation(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name): reValue1 = MatchNode(m_graph, m_label1, m_attrs1) reValue2 = MatchNode(m_graph, m_label2, m_attrs2) if reValue1 is None or reValue2 is None: return False print(m_r_name) propertyes={'value': m_r_name['value'], 'danwei': m_r_name['danwei']} m_r = Relationship(reValue1, m_r_name['name'], reValue2,**propertyes) graph.merge(m_r) #修改节点属性 def updateNode(m_graph,m_label1,m_attrs1,new_attrs): reValue1 = MatchNode(m_graph, m_label1, m_attrs1) if reValue1 is None: return False reValue1.update(new_attrs) graph.push(reValue1) graph = Graph('http://localhost:7474',username='neo4j',password='fengge666') #获取指定文件夹下的excel import os def get_filename(path,filetype): # 输入路径、文件类型例如'.xlsx' name = [] for root,dirs,files in os.walk(path): for i in files: if os.path.splitext(i)[1]==filetype: name.append(i) return name # 输出由有后缀的文件名组成的列表 def read_real_where_name(): file='data2/gu_jin_lng_lat.xlsx' data=pd.read_excel(file) gu_name=list(data.gu_name) return gu_name def read_where(author,file,gu_name): data=pd.read_excel(file) date=list(data.data) where_name=list(data.wheres) things=list(data.things) for i in range(len(date)): #处理地区,满足我们需要的地区条件 where_list=where_name[i].split(',') for it in where_list: if it in gu_name and it!='无': attr1={"name":things[i],"date":date[i],"where_name":where_name[i]} CreateNode(graph, things_label, attr1) print("创建事件:" + things[i] + "-成功!!") attr2 = {"name": author} # 创建关系 m_r_name1 = "事迹" reValue1 = CreateRelationship(graph, author_label, attr2, things_label, attr1, m_r_name1) print("创建关系:" + author + "-事迹-" + things[i] + "-成功") break if __name__ == '__main__': file = 'author/' lists = get_filename(file, '.xlsx') gu_name = read_real_where_name() author_label='author' things_label='things' for it in lists: newfile = file + it print(newfile) author = it.split('.')[0] print(author) read_where(author,newfile,gu_name)
展示
诗人轨迹地点实体构建
import pandas as pd import numpy as np import re from py2neo import Node,Relationship,Graph,NodeMatcher,RelationshipMatcher # 创建节点 def CreateNode(m_graph,m_label,m_attrs): #根绝节点name属性,查找节点 m_n="_.name="+"\'"+m_attrs['name']+"\'" matcher = NodeMatcher(m_graph) re_value = matcher.match(m_label).where(m_n).first() #print(re_value) if re_value is None: m_mode = Node(m_label,**m_attrs) n = graph.create(m_mode) return n return None # 查询节点 def MatchNode(m_graph,m_label,m_attrs): m_n="_.name="+"\'"+m_attrs['name']+"\'" matcher = NodeMatcher(m_graph) re_value = matcher.match(m_label).where(m_n).first() return re_value # 创建关系 def CreateRelationship(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name): reValue1 = MatchNode(m_graph,m_label1,m_attrs1) reValue2 = MatchNode(m_graph,m_label2,m_attrs2) if reValue1 is None or reValue2 is None: return False m_r = Relationship(reValue1,m_r_name,reValue2) n = graph.create(m_r) return n #查找关系 def findRelationship(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name): reValue1 = MatchNode(m_graph, m_label1, m_attrs1) reValue2 = MatchNode(m_graph, m_label2, m_attrs2) if reValue1 is None or reValue2 is None: return False m_r = Relationship(reValue1, m_r_name['name'], reValue2) return m_r def updateRelation(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name): reValue1 = MatchNode(m_graph, m_label1, m_attrs1) reValue2 = MatchNode(m_graph, m_label2, m_attrs2) if reValue1 is None or reValue2 is None: return False print(m_r_name) propertyes={'value': m_r_name['value'], 'danwei': m_r_name['danwei']} m_r = Relationship(reValue1, m_r_name['name'], reValue2,**propertyes) graph.merge(m_r) #修改节点属性 def updateNode(m_graph,m_label1,m_attrs1,new_attrs): reValue1 = MatchNode(m_graph, m_label1, m_attrs1) if reValue1 is None: return False reValue1.update(new_attrs) graph.push(reValue1) graph = Graph('http://localhost:7474',username='neo4j',password='fengge666') #获取指定文件夹下的excel import os def get_filename(path,filetype): # 输入路径、文件类型例如'.xlsx' name = [] for root,dirs,files in os.walk(path): for i in files: if os.path.splitext(i)[1]==filetype: name.append(i) return name # 输出由有后缀的文件名组成的列表 def read_real_where_name(): file='data2/gu_jin_lng_lat.xlsx' data=pd.read_excel(file) gu_name=list(data.gu_name) return gu_name def read_gu_dict(): file = 'data2/gu_jin_lng_lat.xlsx' data = pd.read_excel(file) gu_name = list(data.gu_name) jin_name=list(data.jin_name) lng=list(data.lng) lat=list(data.lat) gu_dict={} for i in range(len(gu_name)): gu=gu_name[i] gu_dict[gu]={"jin_name":jin_name[i],"lng":lng[i],"lat":lat[i]} return gu_dict def read_where(author,file,gu_name): data=pd.read_excel(file) wheres=data.wheres real_where=[] for i in range(len(wheres)): where_name=wheres[i] where_list=where_name.split(',') for it in where_list: if it in gu_name and it!='无': real_where.append(it) real_where=list(set(real_where)) if len(real_where)!=0: for it in real_where: jin=gu_dict[it]['jin_name'] lat=gu_dict[it]['lat'] lng=gu_dict[it]['lng'] attr1 = {"name":it,"jin_name": jin,"lng":lng,"lat":lat} CreateNode(graph, where_name_label, attr1) print("创建地点:" + it + "成功!!") attr2 = {"name": author} # 创建关系 m_r_name1 = "轨迹" reValue1 = CreateRelationship(graph, author_label, attr2, where_name_label, attr1, m_r_name1) print("创建关系:" + author + "-轨迹-" + it + "成功") if __name__ == '__main__': file = 'author/' lists = get_filename(file, '.xlsx') gu_name = read_real_where_name() gu_dict = read_gu_dict() author_label='author' where_name_label='where_name' for it in lists: newfile = file + it print(newfile) author = it.split('.')[0] read_where(author,newfile, gu_name)
展示
诗词实体关系构建
import pandas as pd import numpy as np import re from py2neo import Node,Relationship,Graph,NodeMatcher,RelationshipMatcher # 创建节点 def CreateNode(m_graph,m_label,m_attrs): m_n="_.name="+"\'"+m_attrs['name']+"\'" matcher = NodeMatcher(m_graph) re_value = matcher.match(m_label).where(m_n).first() #print(re_value) if re_value is None: m_mode = Node(m_label,**m_attrs) n = graph.create(m_mode) return n return None # 查询节点 def MatchNode(m_graph,m_label,m_attrs): m_n="_.name="+"\'"+m_attrs['name']+"\'" matcher = NodeMatcher(m_graph) re_value = matcher.match(m_label).where(m_n).first() return re_value # 创建关系 def CreateRelationship(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name): reValue1 = MatchNode(m_graph,m_label1,m_attrs1) reValue2 = MatchNode(m_graph,m_label2,m_attrs2) if reValue1 is None or reValue2 is None: return False m_r = Relationship(reValue1,m_r_name,reValue2) n = graph.create(m_r) return n #查找关系 def findRelationship(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name): reValue1 = MatchNode(m_graph, m_label1, m_attrs1) reValue2 = MatchNode(m_graph, m_label2, m_attrs2) if reValue1 is None or reValue2 is None: return False m_r = Relationship(reValue1, m_r_name['name'], reValue2) return m_r def updateRelation(m_graph,m_label1,m_attrs1,m_label2,m_attrs2,m_r_name): reValue1 = MatchNode(m_graph, m_label1, m_attrs1) reValue2 = MatchNode(m_graph, m_label2, m_attrs2) if reValue1 is None or reValue2 is None: return False print(m_r_name) propertyes={'value': m_r_name['value'], 'danwei': m_r_name['danwei']} m_r = Relationship(reValue1, m_r_name['name'], reValue2,**propertyes) graph.merge(m_r) #修改节点属性 def updateNode(m_graph,m_label1,m_attrs1,new_attrs): reValue1 = MatchNode(m_graph, m_label1, m_attrs1) if reValue1 is None: return False reValue1.update(new_attrs) graph.push(reValue1) graph = Graph('http://localhost:7474',username='neo4j',password='fengge666') #获取指定文件夹下的excel import os def get_filename(path,filetype): # 输入路径、文件类型例如'.xlsx' name = [] for root,dirs,files in os.walk(path): for i in files: if os.path.splitext(i)[1]==filetype: name.append(i) return name # 输出由有后缀的文件名组成的列表 def create_poem(): file = 'data/' lists = get_filename(file, '.xlsx') for it in lists: newfile = file + it print(newfile) # 获取诗词内容 data = pd.read_excel(newfile).fillna("无") title=list(data.title) desty=list(data.desty) author=list(data.author) content=list(data.content) trans_content=list(data.trans_content) background=list(data.background) tag=list(data.tag) formal=list(data.formal) date=list(data.data) ci_name=list(data.ci_name) qu_name=list(data.qu_name) poem_label='poem' author_label='author' desty_label='desty' formal_label='formal' tag_label='tag' cipai_label='ci_pai' qupai_label='qu_pai' for i in range(len(title)): print("第"+str(i)+"个") attr1 = {"name": title[i], "content": content[i], "trans_content": trans_content[i], "background": background[i],"date":date[i]} CreateNode(graph, poem_label, attr1) print("创建诗词:" + title[i] + "成功!!") if tag[i]!='无': tag_list=tag[i].split(',') for it in tag_list: attr2={"name":it} # 创建关系 m_r_name1 = "分类" reValue1 = CreateRelationship(graph, poem_label, attr1, tag_label, attr2, m_r_name1) print("创建关系:" + title[i] + "-所属类别-" + it + "成功") m_r_name2 = "包含" reValue2 = CreateRelationship(graph, tag_label, attr2, poem_label, attr1, m_r_name2) print("创建关系:" + it + "-包含-" + title[i] + "成功") if formal[i]!='无': attr2={"name":formal[i]} # 创建关系 m_r_name1 = "形式" reValue1 = CreateRelationship(graph, poem_label, attr1, formal_label, attr2, m_r_name1) print("创建关系:" + title[i] + "-所属形式-" + formal[i] + "成功") m_r_name2 = "包含" reValue2 = CreateRelationship(graph, formal_label, attr2, poem_label, attr1, m_r_name2) print("创建关系:" + formal[i] + "-包含-" + title[i] + "成功") if ci_name[i]!='无': attr2 = {"name": ci_name[i]} if MatchNode(graph, cipai_label, attr2) == None: CreateNode(graph, cipai_label, attr2) print("创建词牌名:" + ci_name[i] + "成功!!") # 创建关系 m_r_name1 = "词牌名" reValue1 = CreateRelationship(graph, poem_label, attr1, cipai_label, attr2, m_r_name1) print("创建关系:" + title[i] + "-词牌名-" + ci_name[i] + "成功") m_r_name2 = "包含" reValue2 = CreateRelationship(graph, cipai_label, attr2, poem_label, attr1, m_r_name2) print("创建关系:" + ci_name[i] + "-包含-" + title[i] + "成功") if qu_name[i]!='无': attr2 = {"name": qu_name[i]} if MatchNode(graph, qupai_label, attr2) == None: CreateNode(graph, qupai_label, attr2) print("创建曲牌名:" + qu_name[i] + "成功!!") # 创建关系 m_r_name1 = "曲牌名" reValue1 = CreateRelationship(graph, poem_label, attr1, qupai_label, attr2, m_r_name1) print("创建关系:" + title[i] + "-曲牌名-" + qu_name[i] + "成功") m_r_name2 = "包含" reValue2 = CreateRelationship(graph, qupai_label, attr2, poem_label, attr1, m_r_name2) print("创建关系:" + qu_name[i] + "-包含-" + title[i] + "成功") if author[i]!='无': #创建作者写作关系 attr2={"name":author[i]} if MatchNode(graph,author_label,attr2)!=None: #创建关系 m_r_name1 = "写作" reValue1 = CreateRelationship(graph, author_label, attr2, poem_label, attr1, m_r_name1) print("创建关系:"+author[i]+"-写作-"+title[i]+"成功") m_r_name2 = "作者" reValue2 = CreateRelationship(graph,poem_label, attr1, author_label, attr2, m_r_name2) print("创建关系:" + title[i] + "-作者-" + author[i] + "成功") if desty[i]!='无': attr2 = {"name": desty[i]} if MatchNode(graph, desty_label, attr2) == None: CreateNode(graph, desty_label, attr2) print("创建朝代:" + desty[i] + "成功!!") # 创建关系 m_r_name1 = "朝代" reValue1 = CreateRelationship(graph, poem_label, attr1, desty_label, attr2, m_r_name1) print("创建关系:" + title[i] + "-所属朝代-" + desty[i] + "成功") m_r_name2 = "包含诗词" reValue2 = CreateRelationship(graph, desty_label, attr2, poem_label, attr1, m_r_name2) print("创建关系:" + desty[i] + "-包含-" + title[i] + "成功") if __name__ == '__main__': create_poem()
展示
总结
实体关系构建完成,基本的古诗词关系理清,之后可以进行相关的网页展示。
整个关系结构图,如下所示:
这篇关于中华古诗词知识图谱之实体关系构建&导入neo4j数据库的文章就介绍到这儿,希望我们推荐的文章对大家有所帮助,也希望大家多多支持为之网!
- 2024-11-23Springboot应用的多环境打包入门
- 2024-11-23Springboot应用的生产发布入门教程
- 2024-11-23Python编程入门指南
- 2024-11-23Java创业入门:从零开始的编程之旅
- 2024-11-23Java创业入门:新手必读的Java编程与创业指南
- 2024-11-23Java对接阿里云智能语音服务入门详解
- 2024-11-23Java对接阿里云智能语音服务入门教程
- 2024-11-23JAVA对接阿里云智能语音服务入门教程
- 2024-11-23Java副业入门:初学者的简单教程
- 2024-11-23JAVA副业入门:初学者的实战指南