【知識圖譜】知識圖譜的構建-python-neo4j


環境依賴

jdk、neo4j圖數據庫
neo4j具體的安裝過程可以參考這里:https://cloud.tencent.com/developer/article/1387732

json數據

{
	"_id": {
		"$oid": "5bb578b6831b973a137e3ee6"
	},
	"name": "肺泡蛋白質沉積症",
	"desc": "肺泡蛋白質沉積症(簡稱PAP),又稱Rosen-Castle-man-Liebow綜合征,是一種罕見疾病。該病以肺泡和細支氣管腔內充滿PAS染色陽性,來自肺的富磷脂蛋白質物質為其特征,好發於青中年,男性發病約3倍於女性。",
	"category": ["疾病百科", "內科", "呼吸內科"],
	"prevent": "1、避免感染分支桿菌病,卡氏肺囊腫肺炎,巨細胞病毒等。\n2、注意鍛煉身體,提高免疫力。",
	"cause": "病因未明,推測與幾方面因素有關:如大量粉塵吸入(鋁,二氧化硅等),機體免疫功能下降(尤其嬰幼兒),遺傳因素,酗酒,微生物感染等,而對於感染,有時很難確認是原發致病因素還是繼發於肺泡蛋白沉着症,例如巨細胞病毒,卡氏肺孢子蟲,組織胞漿菌感染等均發現有肺泡內高蛋白沉着。\n雖然啟動因素尚不明確,但基本上同意發病過程為脂質代謝障礙所致,即由於機體內,外因素作用引起肺泡表面活性物質的代謝異常,到目前為止,研究較多的有肺泡巨噬細胞活力,動物實驗證明巨噬細胞吞噬粉塵后其活力明顯下降,而病員灌洗液中的巨噬細胞內顆粒可使正常細胞活力下降,經支氣管肺泡灌洗治療后,其肺泡巨噬細胞活力可上升,而研究未發現Ⅱ型細胞生成蛋白增加,全身脂代謝也無異常,因此目前一般認為本病與清除能力下降有關。",
	"symptom": ["紫紺", "胸痛", "呼吸困難", "乏力", "毓卓"],
	"yibao_status": "否",
	"get_prob": "0.00002%",
	"get_way": "無傳染性",
	"acompany": ["多重肺部感染"],
	"cure_department": ["內科", "呼吸內科"],
	"cure_way": ["支氣管肺泡灌洗"],
	"cure_lasttime": "約3個月",
	"cured_prob": "約40%",
	"cost_money": "根據不同醫院,收費標准不一致,省市三甲醫院約( 8000——15000 元)",
	"check": ["胸部CT檢查", "肺活檢", "支氣管鏡檢查"],
	"recommand_drug": [],
	"drug_detail": []
} ......

實例

import os
import json
from py2neo import Graph, Node

class MedicalGraph:
    def __init__(self):
        cur_dir = '\\'.join(os.path.abspath(__file__).split('\\')[:-1])
        self.data_path = os.path.join(cur_dir, 'data\\medical2.json')
        self.g = Graph("http://localhost:7474", username="neo4j", password="rhino1qaz@wsx")

    def read_nodes(self):
        diseases = []  # 疾病
        drugs = []  # 葯品
        departments = []  # 科室

        disease_infos = []

        rels_disease_drug = [] #疾病和葯品之間的關系
        rels_disease_department = [] #疾病和科室之間的關系
        rels_department_department = [] #科室和科室之間的關系

        count = 0
        for data in open(self.data_path):
            disease_dict = {}
            count += 1
            print(count)
            # 讀取每一行數據
            data_json = json.loads(data)
            print(data_json)
            disease = data_json['name']
            disease_dict['name'] = disease  # 疾病名
            diseases.append(disease)
            if 'cure_department' in data_json:
                cure_department = data_json['cure_department']
                if len(cure_department) == 1:
                    rels_disease_department.append([disease, cure_department[0]])
                if len(cure_department) == 2:
                    big = cure_department[0]
                    small = cure_department[1]
                    rels_department_department.append([small, big])
                    rels_disease_department.append([disease, small])
                disease_dict['cure_department'] = cure_department
                departments += cure_department
            if 'recommand_drug' in data_json:
                recommand_drug = data_json['recommand_drug']
                drugs += recommand_drug
                for drug in recommand_drug:
                    rels_disease_drug.append([disease, drug])
                disease_dict['recommand_drug'] = recommand_drug
            disease_infos.append(disease_dict)
        return set(diseases), set(drugs), set(departments), disease_infos, \
               rels_disease_drug, rels_disease_department, rels_department_department

    def create_node(self, label, nodes):
        count = 0
        for node_name in nodes:
            node = Node(label, name=node_name)
            self.g.create(node)
            count += 1
            print(count, len(nodes))
        return

    '''創建知識圖譜中心疾病的節點'''
    def create_diseases_nodes(self, disease_infos):
        count = 0
        for disease_dict in disease_infos:
            node = Node("Disease", name=disease_dict['name'], recommand_drug=disease_dict['recommand_drug'],
                        cure_department=disease_dict['cure_department'])
            self.g.create(node)
            count += 1
            print(count)
        return

    '''創建知識圖譜實體節點類型schema'''
    def create_graphnodes(self):
        diseases, Drugs, Departments, disease_infos, \
        rels_disease_drug, rels_disease_department, rels_department_department = self.read_nodes()
        self.create_diseases_nodes(disease_infos)
        self.create_node('Drug', Drugs)
        print(len(Drugs))
        self.create_node('Department', Departments)
        print(len(Departments))
        return

    '''創建實體關系邊'''
    def create_graphrels(self):
        diseases, Drugs, Departments, disease_infos, \
        rels_disease_drug, rels_disease_department, rels_department_department = self.read_nodes()
        self.create_relationship('Disease', 'Drug', rels_disease_drug, 'recommand_eat', '宜吃')
        self.create_relationship('Disease', 'Department', rels_disease_department, 'belongs_to', '所屬科室')
        self.create_relationship('Department', 'Department', rels_department_department, 'belongs_to', '屬於')


    def create_relationship(self, start_node, end_node, edges, rel_type, rel_name):
        count = 0
        # 去重處理
        set_edges = []
        for edge in edges:
            set_edges.append('###'.join(edge))
        all = len(set(set_edges))
        for edge in set(set_edges):
            edge = edge.split('###')
            p = edge[0]
            q = edge[1]
            query = "match(p:%s),(q:%s) where p.name='%s'and q.name='%s' create (p)-[rel:%s{name:'%s'}]->(q)" % (
                start_node, end_node, p, q, rel_type, rel_name)
            try:
                self.g.run(query)
                count += 1
                print(rel_type, count, all)
            except Exception as e:
                print(e)
        return

    '''導出數據'''
    def export_data(self):
        diseases, Drugs, Departments, disease_infos, \
        rels_disease_drug, rels_disease_department, rels_department_department = self.read_nodes()
        f_disease = open('disease.txt', 'w+')
        f_drug = open('drug.txt', 'w+')
        f_department = open('department.txt', 'w+')
        f_disease.write('\n'.join(list(diseases)))
        f_drug.write('\n'.join(list(Drugs)))
        f_department.write('\n'.join(list(Departments)))
        f_disease.close()
        f_drug.close()
        f_department.close()
        return

if __name__ == '__main__':
    medicalGraph = MedicalGraph()
    medicalGraph.create_graphnodes()
    medicalGraph.create_graphrels()
    medicalGraph.export_data()

無非就是連接圖數據庫,然后創建節點、創建關系,當做模板來看就行了,最后結果:
image


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM