Outline
用jupyter處理數據時,需要循環打印多個DataFrame,但打印出來的df看起來很難看;
想要的效果是比較規整美觀的df展示,例如單獨展示df時那樣,是一個完整的table視圖。
下面美化下jupyter notebook中for循環輸出DataFrame
解決
解決前
for 循環打印DataFrame效果:
看起來很不舒服
解決后
for 循環打印DataFrame效果:
這下就舒服多了
解決方法
通過 from IPython.display import display 模塊
代碼中導入次模塊,把print(df) 替換為 display(df)
from collections import Counter import xml.etree.ElementTree as ET from IPython.display import display from pandas.io.json import json_normalize # 需要解析的xml文件路徑 path = '/data1/gaochao/refinitiv_sample/BGSBS02/BGSBS_03BDC.xml' tree = ET.parse(path) root = tree.getroot() iter_root = root.iter('BGSInformation') ret = [] def get_xml_content(iter_root): for node in iter_root: dic = node.attrib text = node.text if len(dic) == 0 and text == '\n': pass else: dic['text'] = text ret.append({node.tag: dic}) get_xml_content(node) get_xml_content(iter_root) def parse_data(): # 獲取標簽名重復次數 latest_ret = [] conters = Counter([list(i.keys())[0] for i in ret]) # 整合標簽名只出現一次的 for r_dict in ret: # 查看當前dict的key出現幾次,如果只出現1次直接加入new_ret current_key = list(r_dict.keys())[0] repeat_num = conters[current_key] if repeat_num == 1: latest_ret.append(r_dict) # 整理標簽名重復的字典 for c in conters: # 查看當前dict的key出現幾次,如果只出現1次直接加入new_ret current_key = c repeat_num = conters[current_key] if repeat_num != 1: # 出現幾次,就去ret里拿幾次 tem_list = [] for g in ret: if list(g.keys())[0] == current_key: tem_list.append(g[current_key]) else: continue latest_ret.append({current_key: tem_list}) return latest_ret def get_df(): # 生成DataFrame for p in parse_data(): d_key = list(p.keys())[0] d_value = p[d_key] if type(d_value) is list: df = json_normalize(p, d_key) df_columns = df.columns.tolist() new_columns = [d_key + '_' + column for column in df_columns] df.set_axis(new_columns, axis='columns', inplace=True) display(df) # print(df) else: tem_dict = {} dict_value = p[d_key] for k in dict_value: tem_dict[d_key+'_'+k] = dict_value[k] # print(json_normalize(tem_dict)) display(json_normalize(tem_dict)) get_df()