一.Neo4j簡介
1.數據構成
Neo4j使用圖相關的概念來描述數據模型,把數據保存為圖中的節點以及節點之間的關系。數據主要由三部分構成:
- 節點。節點表示對象實例,每個節點有唯一的ID區別其它節點,節點帶有屬性;
- 關系。就是圖里面的邊,連接兩個節點,另外這里的關系是有向的並帶有屬性;
- 屬性。key-value對,存在於節點和關系中,如圖1所示。
2.索引
Neo4j使用遍歷操作進行查詢。為了加速查詢,Neo4j會建立索引,並根據索引找到遍歷用的起始節點
3.特點
查詢的高性能
利用圖結構進行查詢,因此效率較高
設計的靈活性
開發的敏捷性
二.導入數據
#賬號密碼設置 class MedicalGraph: def __init__(self): ... self.graph = Graph("http://localhost:7474", username="neo4j", password="自己的") #獲取數據路徑 cur_dir = '/'.join(os.path.abspath(__file__).split('/')[:-1]) self.data_path = os.path.join(cur_dir, 'DATA/disease.csv') #讀取文件 def read_file(self): """ 讀取文件,獲得實體,實體關系 :return: """ # cols = ["name", "alias", "part", "age", "infection", "insurance", "department", "checklist", "symptom", # "complication", "treatment", "drug", "period", "rate", "money"] # 實體 diseases = [] # 疾病 aliases = [] # 別名 symptoms = [] # 症狀 parts = [] # 部位 departments = [] # 科室 complications = [] # 並發症 drugs = [] # 葯品 # 疾病的屬性:age, infection, insurance, checklist, treatment, period, rate, money diseases_infos = [] # 關系 disease_to_symptom = [] # 疾病與症狀關系 disease_to_alias = [] # 疾病與別名關系 diseases_to_part = [] # 疾病與部位關系 disease_to_department = [] # 疾病與科室關系 disease_to_complication = [] # 疾病與並發症關系 disease_to_drug = [] # 疾病與葯品關系 all_data = pd.read_csv(self.data_path, encoding='gb18030').loc[:, :].values for data in all_data: disease_dict = {} # 疾病信息 # 疾病 disease = str(data[0]).replace("...", " ").strip() disease_dict["name"] = disease # 別名 line = re.sub("[,、;,.;]", " ", str(data[1])) if str(data[1]) else "未知" for alias in line.strip().split(): aliases.append(alias) disease_to_alias.append([disease, alias]) # 部位 part_list = str(data[2]).strip().split() if str(data[2]) else "未知" for part in part_list: parts.append(part) diseases_to_part.append([disease, part]) # 年齡 age = str(data[3]).strip() disease_dict["age"] = age # 傳染性 infect = str(data[4]).strip() disease_dict["infection"] = infect # 醫保 insurance = str(data[5]).strip() disease_dict["insurance"] = insurance # 科室 department_list = str(data[6]).strip().split() for department in department_list: departments.append(department) disease_to_department.append([disease, department]) # 檢查項 check = str(data[7]).strip() disease_dict["checklist"] = check # 症狀 symptom_list = str(data[8]).replace("...", " ").strip().split()[:-1] for symptom in symptom_list: symptoms.append(symptom) disease_to_symptom.append([disease, symptom]) # 並發症 complication_list = str(data[9]).strip().split()[:-1] if str(data[9]) else "未知" for complication in complication_list: complications.append(complication) disease_to_complication.append([disease, complication]) # 治療方法 treat = str(data[10]).strip()[:-4] disease_dict["treatment"] = treat # 葯品 drug_string = str(data[11]).replace("...", " ").strip() for drug in drug_string.split()[:-1]: drugs.append(drug) disease_to_drug.append([disease, drug]) # 治愈周期 period = str(data[12]).strip() disease_dict["period"] = period # 治愈率 rate = str(data[13]).strip() disease_dict["rate"] = rate # 費用 money = str(data[14]).strip() if str(data[14]) else "未知" disease_dict["money"] = money diseases_infos.append(disease_dict) return set(diseases), set(symptoms), set(aliases), set(parts), set(departments), set(complications), \ set(drugs), disease_to_alias, disease_to_symptom, diseases_to_part, disease_to_department, \ disease_to_complication, disease_to_drug, diseases_infos