医疗知识图谱问答 —— 数据同步
2023/8/4 23:52:22
本文主要是介绍医疗知识图谱问答 —— 数据同步,对大家解决编程问题具有一定的参考价值,需要的程序猿们随着小编来一起学习吧!
前言
前面的文章已经介绍了 neo4j 服务的本地安装,以及数据的增删改查操作方法。那么这里就要进入 python 项目,来完成医疗知识的构建,问答机器人的代码实现。但篇幅较长,本文就主要介绍知识图谱的构建吧。
环境
Anaconda3
Python3.8
Py2neo (新版)
数据来源 (结构)
编码
1. 引入依赖
import json from py2neo import Graph, Node
2. 类的初始化 (连接 neo4j)
def __init__(self): self.data_path = "./data/medical.json" self.neo4j = Graph('bolt://localhost:7687', auth=('neo4j', 'beiqiaosu123456'))
3. 读取数据
def read_data(self): # 疾病 diseases = [] # 症状 symptoms = [] # 科室 departments = [] # 药品 drugs = [] # 食物 foods = [] # 出药厂商 producers = [] # 检查项目 checks = [] # 疾病信息 disease_info = [] # 疾病与症状 rels_symptom = [] # 疾病与并发症 rels_acompany = [] # 疾病与科室 rels_category = [] # 科室与科室 rels_department = [] # 疾病与通用药品 rels_commondrug = [] # 疾病与推荐药品 rels_recommenddrug = [] # 疾病与不可吃 rels_noteat = [] # 疾病与可以吃 rels_doeat = [] # 疾病与推荐吃 rels_recommendeat = [] # 疾病与检查项 rels_check = [] # 厂商与药品 rels_drug_producer = [] for data in open(self.data_path, encoding="utf8", mode="r"): data_json = json.loads(data) disease = data_json['name'] disease_dict = dict() disease_dict['get_prob'] = '' disease_dict['yibao_status'] = '' disease_dict['easy_get'] = '' disease_dict['get_way'] = '' disease_dict['cure_lasttime'] = '' disease_dict['cured_prob'] = '' disease_dict['cost_money'] = '' disease_dict['cure_department'] = [] diseases.append(disease) disease_dict['name'] = disease disease_dict['desc'] = data_json['desc'] disease_dict['prevent'] = data_json['prevent'] disease_dict['cause'] = data_json['cause'] if "get_prob" in data_json: disease_dict['get_prob'] = data_json['get_prob'] if "yibao_status" in data_json: disease_dict['yibao_status'] = data_json['yibao_status'] if "easy_get" in data_json: disease_dict['easy_get'] = data_json['easy_get'] if "get_way" in data_json: disease_dict['get_way'] = data_json['get_way'] if "cure_lasttime" in data_json: disease_dict['cure_lasttime'] = data_json['cure_lasttime'] if "cured_prob" in data_json: disease_dict['cured_prob'] = data_json['cured_prob'] if "cost_money" in data_json: disease_dict['cost_money'] = data_json['cost_money'] disease_info.append(disease_dict) symptom = data_json['symptom'] for symptom_i in symptom: rels_symptom.append([disease, symptom_i]) symptoms += symptom # 科室 if "cure_department" in data_json: cure_department = data_json['cure_department'] departments += cure_department if len(cure_department) == 1: rels_category.append([disease, cure_department[0]]) if len(cure_department) == 2: large = cure_department[0] small = cure_department[1] rels_department.append([large, small]) rels_category.append([disease, large]) disease_dict['cure_department'] = cure_department # 并发症 if 'acompany' in data_json: acompanys = data_json['acompany'] for acompany in data_json['acompany']: rels_acompany.append([disease, acompany]) symptoms += acompanys if 'common_drug' in data_json: commondrug = data_json['common_drug'] drugs += commondrug for drug_c in commondrug: rels_commondrug.append([disease, drug_c]) recommenddrug = data_json['recommand_drug'] for drug_recom in recommenddrug: rels_recommenddrug.append([disease, drug_recom]) drugs += recommenddrug if 'not_eat' in data_json: noteat = data_json['not_eat'] for noteat_i in noteat: rels_noteat.append([disease, noteat_i]) foods += noteat if 'do_eat' in data_json: doeat = data_json['do_eat'] for doeat_i in doeat: rels_doeat.append([disease, doeat_i]) foods += doeat if 'recommand_eat' in data_json: recommendfood = data_json['recommand_eat'] for food_i in recommendfood: rels_recommendeat.append([disease, food_i]) foods += recommendfood checkitem = data_json['check'] for check_i in checkitem: check_i.replace("'", "") if check_i != "血清5'-核苷酸酶(5'-NT)": rels_check.append([disease, check_i]) checks += checkitem # 厂商与药品 druginfo = data_json['drug_detail'] producers += [name.split("(")[0] for name in druginfo] rels_drug_producer += [[name.split("(")[0], name.split("(")[-1].replace(")", "")] for name in druginfo] return set(diseases), set(symptoms), set(producers), set(departments), set(drugs), set(foods), set( checks), disease_info, rels_symptom, rels_acompany, rels_commondrug, rels_recommenddrug, rels_noteat, \ rels_doeat, rels_recommendeat, rels_check, rels_drug_producer, rels_department, rels_category, rels_drug_producer
4. 创建节点
def create_medical_nodes(self): print("start create nodes") diseases, symptoms, producers, departments, drugs, foods, checks, disease_info, rels_symptom,\ rels_acompany,rels_commondrug,rels_recommenddrug,rels_noteat,rels_doeat,rels_recommendeat,\ rels_check,rels_drug_producer,rels_department, rels_category, rels_drug_producer = \ build_medical_graph.read_data() # 创建疾病节点 # self.create_node('Diseases', diseases) # 创建症状节点 # self.create_node('Symptoms', symptoms) # 创建科室 # self.create_node('Departments', departments) # 创建药品 # self.create_node('Drugs', drugs) # 创建食品 # self.create_node('Foods', foods) # 创建出药厂商 # self.create_node('Producers', producers) # 创建检查项 # self.create_node('Checks', checks) self.create_disease_node('Diseases', disease_info) return# 疾病节点单独创建def create_node(self, label, values): count = 0; for val in values: count += 1 print("节点: " + label + ", 名称为: " + val) node = Node(label, name = val) self.neo4j.create(node) return countdef create_disease_node(self, label, values): count = 0 for disease in values: print("节点" + label + ", 名称:" + disease['name']) node = Node(label, name=disease['name'], desc=disease['desc'], prevent=disease['prevent'],cause=disease['cause'], get_prob=disease['get_prob'],yibao_status=disease['yibao_status'],easy_get=disease['easy_get'], get_way=disease['get_way'],cure_lasttime=disease['cure_lasttime'],cured_prob=disease['cured_prob'], cost_money=disease['cost_money'],cure_department=disease['cure_department']) self.neo4j.create(node) return count
5. 创建关联边
def create_medical_rels(self): print("start create rels") diseases, symptoms, producers, departments, drugs, foods, checks, disease_info, rels_symptom, \ rels_acompany, rels_commondrug, rels_recommenddrug, rels_noteat, rels_doeat, rels_recommendeat, \ rels_check, rels_drug_producer, rels_department, rels_category, rels_drug_producer = \ build_medical_graph.read_data() # 疾病与状态 # self.create_rel("Diseases", "Symptoms", rels_symptom, "has_symptoms", "疾病症状") # 疾病与并发症 # self.create_rel("Diseases", "Symptoms", rels_acompany, "acompany_with", "疾病并发症") # 疾病与科室 # self.create_rel("Diseases", "Departments", rels_category, "belongs_to", "所属科室") # 科室与科室 # self.create_rel("Departments", "Departments", rels_department, "belongs_to", "所属") # 疾病与通用药品 # self.create_rel("Diseases", "Drugs", rels_commondrug, "common_drug", "常用备药") # 疾病与推荐药品 # self.create_rel("Diseases", "Drugs", rels_recommenddrug, "recommand_drug", "推荐用药") # 疾病与忌口 # self.create_rel("Diseases", "Foods", rels_noteat, "not_eat", "忌吃") # 疾病与可以吃 # self.create_rel("Diseases", "Foods", rels_doeat, "do_eat", "可以吃") # 疾病与推荐吃 # self.create_rel("Diseases", "Foods", rels_recommendeat, "recomment_eat", "推荐吃") # 疾病与检查项 self.create_rel("Diseases", "Checks", rels_check, "need_check", "需要检查") # 厂商与药品 # self.create_rel("Producers", "drugs", rels_drug_producer, "drug_of", "生产药品")def create_rel(self, start_node, end_node, list, rel_name, rel_attr): count = 0 for item in list: count += 1 s = item[0] e = item[1] print ("创建边:" +rel_name +",("+start_node+"->"+end_node+"),点1:"+s+"点2:"+e) query = "Match (start:%s), (end:%s) where start.name='%s' and end.name='%s' create (start)-[rel:%s{name:'%s'}]->(end)" % ( start_node, end_node, s, e, rel_name, rel_attr ) self.neo4j.run(query) return count
6. 导出节点数据
# 导出实体的节点分词 def export_data(self): diseases, symptoms, producers, departments, drugs, foods, checks, disease_info, rels_symptom, \ rels_acompany, rels_commondrug, rels_recommenddrug, rels_noteat, rels_doeat, rels_recommendeat, \ rels_check, rels_drug_producer, rels_department, rels_category, rels_drug_producer = \ build_medical_graph.read_data() # 疾病名 # f_diseases = open("dict/diseases.txt", encoding="utf-8", mode="w+") # f_diseases.write("\n".join(list(diseases))) # 症状名 f_symptoms = open("dict/symptoms.txt", encoding="utf-8", mode="w+") f_symptoms.write("\n".join(list(symptoms))) f_producers = open("dict/producers.txt", encoding="utf-8", mode="w+") f_producers.write("\n".join(list(producers))) f_departments = open("dict/departments.txt", encoding="utf-8", mode="w+") f_departments.write("\n".join(list(departments))) f_drugs = open("dict/drugs.txt", encoding="utf-8", mode="w+") f_drugs.write("\n".join(list(drugs))) f_foods = open("dict/foods.txt", encoding="utf-8", mode="w+") f_foods.write("\n".join(list(foods))) f_checks = open("dict/checks.txt", encoding="utf-8", mode="w+") f_checks.write("\n".join(list(checks))) f_checks = open("dict/checks.txt", encoding="utf-8", mode="w+") f_checks.write("\n".join(list(checks)))
这篇关于医疗知识图谱问答 —— 数据同步的文章就介绍到这儿,希望我们推荐的文章对大家有所帮助,也希望大家多多支持为之网!
- 2025-01-11cursor试用出现:Too many free trial accounts used on this machine 的解决方法
- 2025-01-11百万架构师第十四课:源码分析:Spring 源码分析:深入分析IOC那些鲜为人知的细节|JavaGuide
- 2025-01-11不得不了解的高效AI办公工具API
- 2025-01-102025 蛇年,J 人直播带货内容审核团队必备的办公软件有哪 6 款?
- 2025-01-10高效运营背后的支柱:文档管理优化指南
- 2025-01-10年末压力山大?试试优化你的文档管理
- 2025-01-10跨部门协作中的进度追踪重要性解析
- 2025-01-10总结 JavaScript 中的变体函数调用方式
- 2025-01-10HR团队如何通过数据驱动提升管理效率?6个策略
- 2025-01-10WBS实战指南:如何一步步构建高效项目管理框架?