from py2neo import Graph,Node,Relationship
# 在cmd中,输入neo4j.bat console并回车
import pandas as pd
neo_graph = Graph(host="127.0.0.1",
# http_port='7474',
user='neo4j',
password='xxxx')
neo_graph
from py2neo import Graph
import os
from tqdm import tqdm
import json
import datetime
class CreateKG():
def __init__(self, kg_host, kg_port, kg_user, kg_password, data_path):
self.graph = Graph(
host=kg_host,
# http_port=kg_port,
user=kg_user,
password=kg_password)
if not data_path or data_path == '':
raise Exception("数据集地址为空")
if not os.path.exists(data_path):
raise Exception("数据集不存在")
self.data_path = data_path
# 创建节点及关系的语句
# CREATE (n:Person { name: 'Andres', title: 'Developer' }) return n;
# CREATE (n:Person { name: 'Vic', title: 'Developer' }) return n;
# match(n:Person{name:"Vic"}),(m:Person{name:"Andres"}) create (n)-[r:Friend]->(m) return r;
# match(n:Person{name:"Vic"}),(m:Person{name:"Andres"}) create (n)<-[r:Friend]-(m) return r;
def saveEntity(self, label, data):
print("\n写入实体:", label)
for item in tqdm(data, ncols=80):
try:
property = []
for key, value in item.items():
value = value.replace("'", "")
property.append(key + ":" + "'" + value + "'")
if len(property) == 0:
continue
cql = "MERGE(n:" + label + "{" + ",".join(property) + "})"
self.graph.run(cql)
except Exception as e:
pass
def saveRelation(self, s_label, e_label, label, data):
print("\n写入关系:", label)
for item in tqdm(data, ncols=80):
try:
s_name = item["s_name"]
e_name = item["e_name"]
cql = "MATCH(p:" + s_label + "),(q:" + e_label + ") WHERE p.name='" + s_name + "' AND q.name='" + e_name + "' MERGE (p)-[r:" + label + "]->(q)"
self.graph.run(cql)
except Exception as e:
pass
def getValue(self, key, data):
if key in data:
return data[key]
return ""
def init(self):
# 实体
# 疾病
diseases = []
# 科室
departments = []
# 疾病症状
symptoms = []
# 治疗方式
cureWays = []
# 检查项目
checks = []
# 药物
drugs = []
# 易感染人群
crowds = []
# 食物
foods = []
# 关系
# 疾病科室
diseaseDepartmentRelations = []
# 疾病症状
diseaseSymptomRelations = []
# 疾病治疗
diseaseCureWayRelations = []
# 疾病检查
diseaseCheckRelations = []
# 疾病用药
diseaseDrugRelations = []
# 疾病易感染人群
diseaseCrowdRelations = []
# 疾病宜吃食物
diseaseSuitableFoodRelations = []
# 疾病忌吃食物
diseaseTabooFoodRelations = []
# 疾病并发症
diseaseDiseaseRelations = []
print("====数据抽取======")
with open(self.data_path, 'r', encoding='utf8') as f:
for line in tqdm(f.readlines(), ncols=80):
data = json.loads(line)
# 疾病实体
disease = {
"name": data["name"],
"desc": self.getValue("desc", data),
"prevent": self.getValue("prevent", data),
"cause": self.getValue("cause", data),
"get_prob": self.getValue("get_prob", data),
"get_way": self.getValue("get_way", data),
"cure_lasttime": self.getValue("cure_lasttime", data),
"cured_prob": self.getValue("cured_prob", data),
"cost_money": self.getValue("cost_money", data),
}
diseases.append(disease)
# 科室
if "cure_department" in data:
for department in data["cure_department"]:
# 疾病科室关系
diseaseDepartmentRelations.append({
"s_name": data["name"],
"e_name": department
})
# 科室实体
property = {
"name": department
}
if property not in departments:
departments.append(property)
# 症状
if "symptom" in data:
for symptom in data["symptom"]:
# 疾病科室关系
diseaseSymptomRelations.append({
"s_name": data["name"],
"e_name": symptom
})
# 症状实体
property = {
"name": symptom
}
if property not in symptoms:
symptoms.append(property)
# 治疗方式
if "cure_way" in data:
for cure_way in data["cure_way"]:
# 疾病科室关系
diseaseCureWayRelations.append({
"s_name": data["name"],
"e_name": cure_way
})
# 治疗方式实体
property = {
"name": cure_way
}
if property not in cureWays:
cureWays.append(property)
# 检查项目
if "check" in data:
for check in data["check"]:
# 疾病科室关系
diseaseCheckRelations.append({
"s_name": data["name"],
"e_name": check
})
# 检查项目实体
property = {
"name": check
}
if property not in checks:
checks.append(property)
# 一般用药
if "common_drug" in data:
for common_drug in data["common_drug"]:
# 疾病科室关系
diseaseDrugRelations.append({
"s_name": data["name"],
"e_name": common_drug
})
# 用药实体
property = {
"name": common_drug
}
if property not in drugs:
drugs.append(property)
# 易感染人群
if "easy_get" in data:
easy_get = data["easy_get"]
# 疾病科室关系
diseaseCrowdRelations.append({
"s_name": data["name"],
"e_name": easy_get
})
# 易感染人群实体
property = {
"name": easy_get
}
if property not in crowds:
crowds.append(property)
# 宜吃食物
if "recommand_eat" in data:
for recommand_eat in data["recommand_eat"]:
# 疾病科室关系
diseaseSuitableFoodRelations.append({
"s_name": data["name"],
"e_name": recommand_eat
})
# 食物实体
property = {
"name": recommand_eat
}
if property not in foods:
foods.append(property)
# 忌吃食物
if "not_eat" in data:
for not_eat in data["not_eat"]:
# 疾病科室关系
diseaseTabooFoodRelations.append({
"s_name": data["name"],
"e_name": not_eat
})
# 食物实体
property = {
"name": not_eat
}
if property not in foods:
foods.append(property)
# 并发症
if "acompany" in data:
for acompany in data["acompany"]:
# 疾病科室关系
diseaseDiseaseRelations.append({
"s_name": data["name"],
"e_name": acompany
})
# 疾病
self.saveEntity("disease", diseases)
# 科室
self.saveEntity("department", departments)
# 疾病症状
self.saveEntity("symptom", symptoms)
# 治疗方式
self.saveEntity("cureWay", cureWays)
# 检查项目
self.saveEntity("check", checks)
# 药物
self.saveEntity("drug", drugs)
# 易感染人群
self.saveEntity("crowd", crowds)
# 食物
self.saveEntity("food", foods)
# 关系
# 疾病科室
self.saveRelation("disease", "department", "diseaseDepartmentRelations", diseaseDepartmentRelations)
# 疾病症状
self.saveRelation("disease", "symptom", "diseaseSymptomRelation", diseaseSymptomRelations)
# 疾病治疗
self.saveRelation("disease", "cureWay", "diseaseCureWayRelation", diseaseCureWayRelations)
# 疾病检查
self.saveRelation("disease", "check", "diseaseCheckRelation", diseaseCheckRelations)
# 疾病用药
self.saveRelation("disease", "drug", "diseaseDrugRelation", diseaseDrugRelations)
# 疾病易感染人群
self.saveRelation("disease", "crowd", "diseaseCrowdRelation", diseaseCrowdRelations)
# 疾病宜吃食物
self.saveRelation("disease", "food", "diseaseSuitableFoodRelation", diseaseSuitableFoodRelations)
# 疾病忌吃食物
self.saveRelation("disease", "food", "diseaseTabooFoodRelation", diseaseTabooFoodRelations)
# 疾病并发症
self.saveRelation("disease", "disease", "diseaseDiseaseRelation", diseaseDiseaseRelations)
if __name__ == '__main__':
start = datetime.datetime.now()
kg_host = "127.0.0.1"
kg_port = 7474
kg_user = "neo4j"
kg_password = "960418.hmx"
data_path = "dataset/知识图谱/medical.json"
kg = CreateKG(kg_host, kg_port, kg_user, kg_password, data_path)
kg.init()
end = datetime.datetime.now()
print("共耗时:{}".format(end - start))
更多【编程技术-利用知识图谱构建医疗问答】相关视频教程:www.yxfzedu.com