一、原始問題
1.執行如下代碼
import json def test_dumps(): data={"keys":"string",1:[2,3],"dict":{"a":"b"},"key_bytes":b'123'} ans=json.dumps(data) print(ans) if __name__ == "__main__": test_dumps()
2.對於如上代碼,我們會遇到如下錯誤
Traceback (most recent call last): File "test_dumps.py", line 8, in <module> test_dumps() File "test_dumps.py", line 4, in test_dumps ans=json.dumps(data) File "/usr/lib/python3.6/json/__init__.py", line 231, in dumps return _default_encoder.encode(obj) File "/usr/lib/python3.6/json/encoder.py", line 199, in encode chunks = self.iterencode(o, _one_shot=True) File "/usr/lib/python3.6/json/encoder.py", line 257, in iterencode return _iterencode(o, 0) File "/usr/lib/python3.6/json/encoder.py", line 180, in default o.__class__.__name__) TypeError: Object of type 'bytes' is not JSON serializable
二、代碼追蹤
1.dumps函數
針對以上問題,我們一步一步看源碼,進入到json.dumps源碼,可以看到如下內容,這里刪除了源碼中的注釋。可以看到是通過JSONEncoder這個類的encode方法來編碼輸入的obj數據
def dumps(obj, *, skipkeys=False, ensure_ascii=True, check_circular=True, allow_nan=True, cls=None, indent=None, separators=None, default=None, sort_keys=False, **kw): # cached encoder if (not skipkeys and ensure_ascii and check_circular and allow_nan and cls is None and indent is None and separators is None and default is None and not sort_keys and not kw): return _default_encoder.encode(obj) if cls is None: cls = JSONEncoder return cls( skipkeys=skipkeys, ensure_ascii=ensure_ascii, check_circular=check_circular, allow_nan=allow_nan, indent=indent, separators=separators, default=default, sort_keys=sort_keys, **kw).encode(obj)
2.encode函數實現
再次進入到encode中,如果數據o是字符串,則有兩種編碼方式。
一種(encode_basestring_ascii)是使用ascii碼表示,這種是會把中文字符自動轉化為unicode,然后在其他語言時會把Unicode解析為字符,而非一個中文字符。比如會把"中"轉化為"\u4e2d",而其他語言會講這個看為6個字符,而非"中"
另一種(encode_basestring)是編碼為二進制。
在這個函數中,主要是利用self.iterencode這個方法處理數據。
def encode(self, o): # This is for extremely simple cases and benchmarks. if isinstance(o, str): if self.ensure_ascii: return encode_basestring_ascii(o) else: return encode_basestring(o) chunks = self.iterencode(o, _one_shot=True) if not isinstance(chunks, (list, tuple)): chunks = list(chunks) return ''.join(chunks)
3.核心處理函數_make_iterencode,其中包含了可擴展的_default
真正要編碼的數據的部分是如下,可以看到在_iterencode中各個if條件處理數據,其中dict和list又單獨寫了一個函數處理,而如果數據不在if中,會調用_default處理。
def _make_iterencode(markers, _default, _encoder, _indent, _floatstr, _key_separator, _item_separator, _sort_keys, _skipkeys, _one_shot, ## HACK: hand-optimized bytecode; turn globals into locals ValueError=ValueError, dict=dict, float=float, id=id, int=int, isinstance=isinstance, list=list, str=str, tuple=tuple, _intstr=int.__str__, ): if _indent is not None and not isinstance(_indent, str): _indent = ' ' * _indent def _iterencode_list(lst, _current_indent_level): #只給出函數定義,具體方法這里不列出 def _iterencode_dict(dct, _current_indent_level): #只給出函數定義,具體方法這里不列出 def _iterencode(o, _current_indent_level): if isinstance(o, str): yield _encoder(o) elif o is None: yield 'null' elif o is True: yield 'true' elif o is False: yield 'false' elif isinstance(o, int): # see comment for int/float in _make_iterencode yield _intstr(o) elif isinstance(o, float): # see comment for int/float in _make_iterencode yield _floatstr(o) elif isinstance(o, (list, tuple)): yield from _iterencode_list(o, _current_indent_level) elif isinstance(o, dict): yield from _iterencode_dict(o, _current_indent_level) else: if markers is not None: markerid = id(o) if markerid in markers: raise ValueError("Circular reference detected") markers[markerid] = o o = _default(o) yield from _iterencode(o, _current_indent_level) if markers is not None: del markers[markerid] return _iterencode
三、解決問題
再次回到開始的問題,我們需要重寫json.JSONEncoder中的default函數,這個default函數就是上述提到的_default函數,在default中添加處理bytes類型,修改后代碼如下。
import json import numpy as np class Encoder(json.JSONEncoder): def default(self, obj): if isinstance(obj, np.ndarray): return obj.tolist() elif isinstance(obj, bytes): return str(obj, encoding='utf-8') return json.JSONEncoder.default(self, obj) def test_dumps(): data={"keys":"string",1:[2,3],"dict":{"a":"b"},"key_bytes":b'123'} ans=json.dumps(data,cls=Encoder) print(ans) if __name__ == "__main__": test_dumps()
再次運行,可以獲得如下結果,成功解決問題
{"keys": "string", "1": [2, 3], "dict": {"a": "b"}, "key_bytes": "123"}