博客:blog.shinelee.me | 博客園 | CSDN
這篇文章中,我們將定義一個相對復雜的數據結構,直接分析其序列化后的二進制文件。
Proto文件
編寫addressbook.proto文件,在官方例子上略作修改,增加了float
字段,以分析浮點數的存儲方式。
syntax = "proto2";
package tutorial;
message Person {
required string name = 1;
required int32 id = 2;
optional string email = 3;
enum PhoneType {
MOBILE = 0;
HOME = 1;
WORK = 2;
}
message PhoneNumber {
required string number = 1;
optional PhoneType type = 2 [default = HOME];
}
repeated PhoneNumber phones = 4;
repeated float weight_recent_months = 100 [packed = true];
}
message AddressBook {
repeated Person people = 1;
}
生成編解碼文件,addressbook.pb.cc和addressbook.pb.h。
protoc.exe addressbook.proto --cpp_out=.
序列化
編寫如下代碼,將address_book
對象序列化,保存到二進制文件address_book.bin。
int main()
{
tutorial::AddressBook address_book;
tutorial::Person* person = address_book.add_people();
person->set_id(1);
person->set_name("Jack");
person->set_email("Jack@qq.com");
tutorial::Person::PhoneNumber* phone_number = person->add_phones();
phone_number->set_number("123456");
phone_number->set_type(tutorial::Person::HOME);
phone_number = person->add_phones();
phone_number->set_number("234567");
phone_number->set_type(tutorial::Person::MOBILE);
person->add_weight_recent_months(50);
person->add_weight_recent_months(52);
person->add_weight_recent_months(54);
fstream fw("./address_book.bin", ios::out | ios::binary);
address_book.SerializePartialToOstream(&fw);
fw.close();
return 0;
}
二進制文件address_book.bin一共有62個字節,內容如下:
二進制文件解析
由前面的文章,每個field
的key = (field_number << 3) | wire_type
都通過varint表示。
message Addressbook的第一個字段為Person people
,Person
也是一個message,下面逐個字節地進行解析。
0a // (1 << 3) + 2,1為people的field_bumber,2為embedded message對應的wire type
3c // 0x3c = 60,表示接下來60個字節為Person people的數據
// 下面進入到 message Person
0a // (1 << 3) + 2,Person的第一個字段name field_number=1,2為string對應的wire type
04 // name字段的字符串長度為4
4a 61 63 6b // "Jack" 的ascii編碼
10 // (2 << 3) + 0,字段id field_number=2,0為int32對應的wire type
01 // id為1
1a // (3 << 3) + 2,字段email field_number=3,2為string對應的wire type
0b // 0x0b = 11 email字段的字符串長度為11
4a 61 63 6b 40 71 71 2e 63 6f 6d // "Jack@qq.com"
//第1個PhoneNumber,嵌套message
22 // (4 << 3) + 2,,phones字段,field_number=4,2為embedded message對應的wire type
0a // 接下來10個字節為PhoneNumber的數據
0a // (1 << 3) + 2, message PhoneNumber的第一個字段number,2為string對應的wire type
06 // number字段的字符串長度為6
31 32 33 34 35 36 // "123456"
10 // (2 << 3) + 0,PhoneType type字段,0為enum對應的wire type
01 // HOME,enum被視為整數
// 第2個PhoneNumber,嵌套message
22 0a 0a 06 32 33 34 35 36 37 10 00 //信息解讀同上,最后的00為MOBILE
a2 06 // 1010 0010 0000 0110 varint方式,weight_recent_months的key
// 010 0010 000 0110 → 000 0110 0100 010 little-endian存儲
// (100 << 3) + 2,100為weight_recent_months的field number
// 2為 packed repeated field的wire type
0c // 后面12個字節為packed float的數據,每4個字節一個
00 00 48 42 // float 50
00 00 50 42 // float 52
00 00 58 42 // float 54
需要注意的是,repeated后面接的字段如果是個message,比如上面的PhoneNumber,有幾個PhoneNumber,編碼時其key就會出現幾次;如果接的是數值型的字段,且以packed = true壓縮存儲時,只會出現1個key,如果不以壓縮方式存儲,其key也會出現多次,在proto3中,默認以壓縮方式進行存儲,proto2中則需要顯式地聲明。
至此,二進制文件已經分析完畢,現在再去看解碼代碼,就so easy了。
反序列化
這里只貼上message Person對應的解碼代碼,可以看到其中遇到嵌套message PhoneNumber時,會去調用PhoneNumber的解碼代碼。
bool Person::MergePartialFromCodedStream(
::google::protobuf::io::CodedInputStream* input) {
#define DO_(EXPRESSION) if (!PROTOBUF_PREDICT_TRUE(EXPRESSION)) goto failure
::google::protobuf::uint32 tag;
// @@protoc_insertion_point(parse_start:tutorial.Person)
for (;;) {
::std::pair<::google::protobuf::uint32, bool> p = input->ReadTagWithCutoffNoLastTag(16383u);
tag = p.first;
if (!p.second) goto handle_unusual;
switch (::google::protobuf::internal::WireFormatLite::GetTagFieldNumber(tag)) {
// required string name = 1;
case 1: {
if (static_cast< ::google::protobuf::uint8>(tag) == (10 & 0xFF)) {
DO_(::google::protobuf::internal::WireFormatLite::ReadString(
input, this->mutable_name()));
::google::protobuf::internal::WireFormat::VerifyUTF8StringNamedField(
this->name().data(), static_cast<int>(this->name().length()),
::google::protobuf::internal::WireFormat::PARSE,
"tutorial.Person.name");
} else {
goto handle_unusual;
}
break;
}
// required int32 id = 2;
case 2: {
if (static_cast< ::google::protobuf::uint8>(tag) == (16 & 0xFF)) {
HasBitSetters::set_has_id(this);
DO_((::google::protobuf::internal::WireFormatLite::ReadPrimitive<
::google::protobuf::int32, ::google::protobuf::internal::WireFormatLite::TYPE_INT32>(
input, &id_)));
} else {
goto handle_unusual;
}
break;
}
// optional string email = 3;
case 3: {
if (static_cast< ::google::protobuf::uint8>(tag) == (26 & 0xFF)) {
DO_(::google::protobuf::internal::WireFormatLite::ReadString(
input, this->mutable_email()));
::google::protobuf::internal::WireFormat::VerifyUTF8StringNamedField(
this->email().data(), static_cast<int>(this->email().length()),
::google::protobuf::internal::WireFormat::PARSE,
"tutorial.Person.email");
} else {
goto handle_unusual;
}
break;
}
// repeated .tutorial.Person.PhoneNumber phones = 4;
case 4: {
if (static_cast< ::google::protobuf::uint8>(tag) == (34 & 0xFF)) {
DO_(::google::protobuf::internal::WireFormatLite::ReadMessage(
input, add_phones()));
} else {
goto handle_unusual;
}
break;
}
// repeated float weight_recent_months = 100 [packed = true];
case 100: {
if (static_cast< ::google::protobuf::uint8>(tag) == (802 & 0xFF)) {
DO_((::google::protobuf::internal::WireFormatLite::ReadPackedPrimitive<
float, ::google::protobuf::internal::WireFormatLite::TYPE_FLOAT>(
input, this->mutable_weight_recent_months())));
} else if (static_cast< ::google::protobuf::uint8>(tag) == (805 & 0xFF)) {
DO_((::google::protobuf::internal::WireFormatLite::ReadRepeatedPrimitiveNoInline<
float, ::google::protobuf::internal::WireFormatLite::TYPE_FLOAT>(
2, 802u, input, this->mutable_weight_recent_months())));
} else {
goto handle_unusual;
}
break;
}
default: {
handle_unusual:
if (tag == 0) {
goto success;
}
DO_(::google::protobuf::internal::WireFormat::SkipField(
input, tag, _internal_metadata_.mutable_unknown_fields()));
break;
}
}
}
success:
// @@protoc_insertion_point(parse_success:tutorial.Person)
return true;
failure:
// @@protoc_insertion_point(parse_failure:tutorial.Person)
return false;
#undef DO_
}
以上。