python解析pb二進制文件,寫入明文文本
背景: 在項目中需要解析pb二進制文件,轉為明文,寫入txt文本中保存,同時轉為由分隔符進行分隔的行列式結構,在這記錄一下這個過程,以下列出了兩種方法:方法二存在bytes解析失敗的問題,采取方法一:
主要由以下四步組成:
1、二進制數據反序列化
2、反序列化數據寫入臨時文件 temp
3、讀取臨時文件,轉換為標准行列式,寫入明文文件
4、刪除臨時文件
"""
file_src:pb二進制文件
file_des:txt明文文件
"""
def pb_2_txt(self, binary_conf, file_src, file_des):
"""
從pb中讀取數據文件
binary_conf 配置信息
file_src 二進制文件
file_des 明文文件存儲位置
"""
# 指明引入的包:proto生成的py文件
import pb2
if len(binary_conf['message']) > 0:
try:
# pb_message:就是生成的py文件中的對象
pb_message = eval(binary_conf['message'])
except NameError:
print_utils.print_warning('[FATAL] pb name not found: %s, quit' % binary_conf['message'])
exit(1)
except AttributeError:
print_utils.print_warning('[FATAL] pb attribute not found: %s, quit' % binary_conf['message'])
exit(1)
else:
print_utils.print_warning('[FATAL] message not found: %s, quit')
exit(1)
if len(binary_conf['message_name']) > 0:
try:
message_name = binary_conf['message_name']
except NameError:
print_utils.print_warning('[FATAL] pb message_name not found: %s, quit' % binary_conf['message_name'])
exit(1)
except AttributeError:
print_utils.print_warning('[FATAL] pb attribute not found: %s, quit' % binary_conf['message_name'])
exit(1)
else:
print_utils.print_warning('[FATAL] message_name not found: %s, quit')
exit(1)
mesasge_list = []
"""
方法一:讀取pb數據,寫入文件,不轉為dict,解決bytes類型數據轉換失敗的問題
1、二進制數據反序列化
2、反序列化數據寫入臨時文件 temp
3、讀取臨時文件,轉換為標准行列式,寫入明文文件
4、刪除臨時文件
"""
try:
# 1、二進制反序列化
with open(file_src, 'rb') as bf:
binary_data = bf.read()
# 反序列化
pb_message.ParseFromString(binary_data)
except Exception as e:
traceback.print_exc()
print_utils.print_warning('[FATAL] ParseFromString fail: %s, quit' % binary_conf['message'])
exit(1)
try:
# 2、反序列化數據寫入臨時文件
with open(file_des + '.temp', 'w') as tf:
tf.write(str(pb_message))
except Exception as e:
traceback.print_exc()
print_utils.print_warning('[FATAL] write temp file fail: %s, quit' % binary_conf['message'])
exit(1)
try:
# 讀取臨時文件,寫入明文文件,轉換為標准行列式
with open(file_des + '.temp', 'r') as tf:
mesasge_list = []
temp_list = []
temp_content = tf.readlines()
for line in temp_content:
line = line.strip('\n')
# 根據message_name區分,過濾首尾行,例如:coach_lines { }
if '{' in line: # 首行
continue
if '}' in line: # 尾行
# 寫入message_list
mesasge_list.append(binary_conf['split'].join(temp_list))
temp_list = []
else:
# 非首尾行,寫入
temp_list.append(line.split(': ')[1])
# 寫入明文文件
self.write_all(file_des, mesasge_list)
except Exception as e:
traceback.print_exc()
print_utils.print_warning('[FATAL] write txt file fail: %s, quit' % binary_conf['message'])
exit(1)
try:
# 刪除temp臨時文件
os.remove(file_des + '.temp')
except Exception as e:
print_utils.print_warning('[WARNING] remove temp file fail: %s, quit' % binary_conf['message'])
exit(1)
# 方法二:pb轉dict,有問題:bytes類型的數據protobuf_to_dict轉換有問題
# try:
# with open(file_src, 'rb') as bf:
# # 二進制文件數據
# binary_data = bf.read()
# # 反序列化
# pb_message.ParseFromString(binary_data)
# # pb轉dict
# dict_data = protobuf_to_dict(pb_message)
# # 處理dict,寫入明文文件中
# coach_graphs = dict_data[message_name]
# for graphDic in coach_graphs:
# # print(graphDic)
# # exit(1)
# message = []
# # 處理common字段
# if len(binary_conf['common']) > 0:
# count = 0
# for common_field in binary_conf['common']:
# if count > 10:
# exit(1)
# msg_type = common_field.split(' ')[0]
# msg_content = graphDic[common_field.split(' ')[1]]
# # # bytes字段轉為字符串
# if msg_type == 'bytes':
# print('------')
# print(("b'" + msg_content).decode())
# exit(1)
# message.append(str(msg_content.decode("utf-8")))
# # message.append(str(msg_content.decode("utf-8").decode('gbk').encode('utf-8')))
# else:
# message.append(str(msg_content))
# # print(msg_type)
# # print(msg_content)
# count += 1
# # 處理repeated字段
# if len(binary_conf['repeated']) > 0:
# for common_field in binary_conf['repeated']:
# for line_sid in graphDic[common_field.split(' ')[1]]:
# # message.append(str(line_sid.decode("gbk"))) # aaa.decode("gbk")
# message.append(str(line_sid))
# mesasge_list.append(binary_conf['split'].join(message))
# self.write_all(file_des, mesasge_list)
# except Exception as e:
# traceback.print_exc()
# print_utils.print_warning('[FATAL] ParseFromString fail: %s, quit' % binary_conf['message'])
# exit(1)