版本
elasticsearch5.5.0
python3.7
說明
用python查詢es上存儲的狀態數據,將查詢到的數據用pandas處理成excel
code
# -*- coding: utf-8 -*-
# @Time : 2019/7/22 10:41
# @Author : Skyell Wang
# @FileName: es_data_get.py
from elasticsearch import Elasticsearch
import pandas as pd
import os
def elastic_data(vin):
"""
:param vin:
:return:
"""
# 連接es集群
es = Elasticsearch(["ip"],
http_auth=('elastic', 'password'),
port=9200)
# 根據特定條件獲取數據
body = {
"query": {
"term": {
"vin": vin
}
}
}
# 取出vin odometer
para = {
"_source": "vin,odometer"
}
# 獲取數據
query = es.search(index=ddfsdfd', doc_type='dfsf',
size=1000, body=body, params=para)
results = query['hits']['hits']
# 判斷數據是否為空
if results == []:
print('vin數據不存在', vin)
else:
try:
# 存儲邏輯:如果總里程數據存儲,則正常存儲,否則置為'null'
if 'odometer' not in results[0]['_source']:
results[0]['_source']['odometer'] = 0
print(results[0]['_source'])
else:
print(results[0]['_source'])
except NameError as e:
print(e)
return results[0]['_source']
if __name__ == "__main__":
path = "E:\MyCode\ML_InAction\data_analysis\data_do"
gc5_vin = "GC5_vin.csv"
gc5_vin_path = os.path.join(path, gc5_vin)
excel_file_name = 'GC5_data.xlsx'
excel_path = os.path.join(path, excel_file_name)
# 獲取csv文件中固定列(vehicle_identifier)
df_data = pd.read_csv(gc5_vin_path, usecols=['vehicle_identifier'])
vin_data_list = []
for i in range(len(df_data)):
data_gc5 = elastic_data(df_data['vehicle_identifier'][i])
# 判斷是否為字典類型,如果是則存入list中
is_dict = isinstance(data_gc5, dict)
if is_dict:
vin_data_list.append(data_gc5)
# 轉成dateframe類型,並存入excel
vin_data_df = pd.DataFrame(vin_data_list)
vin_data_df.to_excel(excel_path, index=False)
print("任務已完成!")