1 #! /usr/bin/env python3 2 3 """Base16, Base32, Base64 (RFC 3548), Base85 and Ascii85 data encodings""" 4 5 # Modified 04-Oct-1995 by Jack Jansen to use binascii module 6 # Modified 30-Dec-2003 by Barry Warsaw to add full RFC 3548 support 7 # Modified 22-May-2007 by Guido van Rossum to use bytes everywhere 8 9 import re 10 import struct 11 import binascii 12 13 __all__ = [ 14 # Legacy interface exports traditional RFC 1521 Base64 encodings 15 'encode', 'decode', 'encodebytes', 'decodebytes', 16 # Generalized interface for other encodings 17 'b64encode', 'b64decode', 'b32encode', 'b32decode', 18 'b16encode', 'b16decode', 19 # Base85 and Ascii85 encodings 20 'b85encode', 'b85decode', 'a85encode', 'a85decode', 21 # Standard Base64 encoding 22 'standard_b64encode', 'standard_b64decode', 23 # Some common Base64 alternatives. As referenced by RFC 3458, see thread 24 # starting at: 25 # 26 # http://zgp.org/pipermail/p2p-hackers/2001-September/000316.html 27 'urlsafe_b64encode', 'urlsafe_b64decode', 28 ] 29 30 31 bytes_types = (bytes, bytearray) # Types acceptable as binary data 32 33 def _bytes_from_decode_data(s): 34 ''' 35 返回字节包,否则抛出异常 36 ''' 37 if isinstance(s, str): 38 try: 39 return s.encode('ascii') 40 except UnicodeEncodeError: 41 raise ValueError('string argument should contain only ASCII characters') 42 if isinstance(s, bytes_types): 43 44 #s是否是bytes或者bytearray中的某种类型。isinstance(s,(type1,type2,...)) 45 46 return s 47 try: 48 return memoryview(s).tobytes() 49 50 #memoryview()返回内存指针,s必须是bytes或bytearray类型 51 #memoryview(b'abc') ==> <memory at 0x0000000003723368> 52 #memoryview(b'abc').tobytes() ==> b'abc' 53 #memoryview(b'abc')[0] ==> b'a' 54 55 except TypeError: 56 raise TypeError("argument should be a bytes-like object or ASCII " 57 "string, not %r" % s.__class__.__name__) from None 58 59 60 # Base64 encoding/decoding uses binascii 61 62 def b64encode(s, altchars=None): 63 """Encode a byte string using Base64. 64 65 s is the byte string to encode. Optional altchars must be a byte 66 string of length 2 which specifies an alternative alphabet for the 67 '+' and '/' characters. This allows an application to 68 e.g. generate url or filesystem safe Base64 strings. 69 70 The encoded byte string is returned. 71 """ 72 # Strip off the trailing newline 73 encoded = binascii.b2a_base64(s)[:-1] 74 75 #将二进制字节包s转换成经过base64编码的ascii字节包 76 77 if altchars is not None: 78 79 #如果有altchars选项,且altchars长度为2,则用其替换'+'和'/' 80 81 assert len(altchars) == 2, repr(altchars) 82 return encoded.translate(bytes.maketrans(b'+/', altchars)) 83 84 #在已经是base64编码的encoded中替换b'+/'为altchars对应的字符 85 #bytes.maketrans(frm,to)返回一个从frm到to的映射表 86 #encoded.translate()采用maketrans返回的映射表将encoded转换 87 88 return encoded 89 90 91 def b64decode(s, altchars=None, validate=False): 92 """Decode a Base64 encoded byte string. 93 94 s is the byte string to decode. Optional altchars must be a 95 string of length 2 which specifies the alternative alphabet used 96 instead of the '+' and '/' characters. 97 98 The decoded string is returned. A binascii.Error is raised if s is 99 incorrectly padded. 100 101 If validate is False (the default), non-base64-alphabet characters are 102 discarded prior to the padding check. If validate is True, 103 non-base64-alphabet characters in the input result in a binascii.Error. 104 """ 105 s = _bytes_from_decode_data(s) 106 if altchars is not None: 107 altchars = _bytes_from_decode_data(altchars) 108 assert len(altchars) == 2, repr(altchars) 109 s = s.translate(bytes.maketrans(altchars, b'+/')) 110 if validate and not re.match(b'^[A-Za-z0-9+/]*={0,2}$', s): 111 112 #base64编码最后补位的等号个数只能是0,1,2 113 114 raise binascii.Error('Non-base64 digit found') 115 return binascii.a2b_base64(s) 116 117 #将ascii形式的base64解码 118 119 120 121 def standard_b64encode(s): 122 """Encode a byte string using the standard Base64 alphabet. 123 124 s is the byte string to encode. The encoded byte string is returned. 125 """ 126 return b64encode(s) 127 128 def standard_b64decode(s): 129 """Decode a byte string encoded with the standard Base64 alphabet. 130 131 s is the byte string to decode. The decoded byte string is 132 returned. binascii.Error is raised if the input is incorrectly 133 padded or if there are non-alphabet characters present in the 134 input. 135 """ 136 return b64decode(s) 137 138 139 _urlsafe_encode_translation = bytes.maketrans(b'+/', b'-_') 140 _urlsafe_decode_translation = bytes.maketrans(b'-_', b'+/') 141 142 #url的base64编码中将'+/'替换成了'-_' 143 144 def urlsafe_b64encode(s): 145 """Encode a byte string using a url-safe Base64 alphabet. 146 147 s is the byte string to encode. The encoded byte string is 148 returned. The alphabet uses '-' instead of '+' and '_' instead of 149 '/'. 150 """ 151 return b64encode(s).translate(_urlsafe_encode_translation) 152 153 def urlsafe_b64decode(s): 154 """Decode a byte string encoded with the standard Base64 alphabet. 155 156 s is the byte string to decode. The decoded byte string is 157 returned. binascii.Error is raised if the input is incorrectly 158 padded or if there are non-alphabet characters present in the 159 input. 160 161 The alphabet uses '-' instead of '+' and '_' instead of '/'. 162 """ 163 s = _bytes_from_decode_data(s) 164 s = s.translate(_urlsafe_decode_translation) 165 return b64decode(s) 166 167 168 169 # Base32 encoding/decoding must be done in Python 170 _b32alphabet = b'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567' 171 172 #base32用的32个字符是'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567' 173 _b32tab2 = None 174 _b32rev = None 175 176 def b32encode(s): 177 """Encode a byte string using Base32. 178 179 s is the byte string to encode. The encoded byte string is returned. 180 """ 181 global _b32tab2 182 # Delay the initialization of the table to not waste memory 183 # if the function is never called 184 if _b32tab2 is None: 185 b32tab = [bytes((i,)) for i in _b32alphabet] 186 187 #bytes(iterable_of_ints) -> bytes 188 #(i,)是一个可迭代的数字元组,如(61,) (62,)……也可以是bytes([i,]) 189 #bytes((66,)) ==> b'B' 190 #bytes((66,67)) ==> b'BC' 191 #[i for i in _b32alphabet]返回一个int类型的列表 192 193 _b32tab2 = [a + b for a in b32tab for b in b32tab] 194 195 #_b32tab2 = [b'AA',b'AB',b'AC',.......,b'76',b'77'] 196 #len(_b32tab2) ==> 1024,每10bit截取一次,每次都对应着1024中的一组 197 198 b32tab = None 199 200 if not isinstance(s, bytes_types): 201 s = memoryview(s).tobytes() 202 leftover = len(s) % 5 203 # Pad the last quantum with zero bits if necessary 204 if leftover: 205 s = s + bytes(5 - leftover) # Don't use += ! 206 207 #填充5-leftover个字节的0 208 209 encoded = bytearray() 210 from_bytes = int.from_bytes 211 b32tab2 = _b32tab2 212 for i in range(0, len(s), 5): 213 c = from_bytes(s[i: i + 5], 'big') 214 215 #int.from_bytes(b'\x00\x10', byteorder='big') ==> 16 216 #int.from_bytes(b'\x00\x10', byteorder='little') ==> 4096 217 218 encoded += (b32tab2[c >> 30] + # bits 1 - 10 219 b32tab2[(c >> 20) & 0x3ff] + # bits 11 - 20 220 b32tab2[(c >> 10) & 0x3ff] + # bits 21 - 30 221 b32tab2[c & 0x3ff] # bits 31 - 40 222 ) 223 # Adjust for any leftover partial quanta 224 if leftover == 1: 225 encoded[-6:] = b'======' 226 elif leftover == 2: 227 encoded[-4:] = b'====' 228 elif leftover == 3: 229 encoded[-3:] = b'===' 230 elif leftover == 4: 231 encoded[-1:] = b'=' 232 return bytes(encoded) 233 234 235 ''' 236 以b'abc'编码为例,解释编码后补齐的=个数: 237 a b c x x 238 239 8 + 2 6 +4 4 +6 2 +8 240 -------- ----- ------ ------- 241 AB CD 4 +1 5 + 2 + 8 242 ----- 243 E === 244 base32,5bit一个编码字符 245 ''' 246 247 248 def b32decode(s, casefold=False, map01=None): 249 """Decode a Base32 encoded byte string. 250 251 s is the byte string to decode. Optional casefold is a flag 252 specifying whether a lowercase alphabet is acceptable as input. 253 For security purposes, the default is False. 254 255 RFC 3548 allows for optional mapping of the digit 0 (zero) to the 256 letter O (oh), and for optional mapping of the digit 1 (one) to 257 either the letter I (eye) or letter L (el). The optional argument 258 map01 when not None, specifies which letter the digit 1 should be 259 mapped to (when map01 is not None, the digit 0 is always mapped to 260 the letter O). For security purposes the default is None, so that 261 0 and 1 are not allowed in the input. 262 263 The decoded byte string is returned. binascii.Error is raised if 264 the input is incorrectly padded or if there are non-alphabet 265 characters present in the input. 266 """ 267 global _b32rev 268 # Delay the initialization of the table to not waste memory 269 # if the function is never called 270 if _b32rev is None: 271 _b32rev = {v: k for k, v in enumerate(_b32alphabet)} 272 273 #{65: 0, 66: 1, 67: 2, 68: 3, 69: 4, 70: 5, 71: 6, 72: 7, 73: 8, 274 #74: 9, 75: 10, 76: 11, 77: 12, 78: 13, 79: 14, 80: 15, 81: 16, 275 #82: 17, 83: 18, 84: 19, 85: 20, 86: 21, 87: 22, 88: 23, 89: 24, 276 #90: 25, 50: 26, 51: 27, 52: 28, 53: 29, 54: 30, 55: 31} 277 278 s = _bytes_from_decode_data(s) 279 if len(s) % 8: 280 raise binascii.Error('Incorrect padding') 281 # Handle section 2.4 zero and one mapping. The flag map01 will be either 282 # False, or the character to map the digit 1 (one) to. It should be 283 # either L (el) or I (eye). 284 if map01 is not None: 285 map01 = _bytes_from_decode_data(map01) 286 assert len(map01) == 1, repr(map01) 287 s = s.translate(bytes.maketrans(b'01', b'O' + map01)) 288 if casefold: 289 s = s.upper() 290 # Strip off pad characters from the right. We need to count the pad 291 # characters because this will tell us how many null bytes to remove from 292 # the end of the decoded string. 293 l = len(s) 294 s = s.rstrip(b'=') 295 padchars = l - len(s) #填充了几个等号 296 # Now decode the full quanta 297 decoded = bytearray() 298 b32rev = _b32rev 299 for i in range(0, len(s), 8): 300 quanta = s[i: i + 8] 301 acc = 0 302 try: 303 for c in quanta: 304 acc = (acc << 5) + b32rev[c] 305 except KeyError: 306 raise binascii.Error('Non-base32 digit found') from None 307 decoded += acc.to_bytes(5, 'big') 308 # Process the last, partial quanta 309 if padchars: 310 acc <<= 5 * padchars #每一个padchars相当于占了5bit 311 last = acc.to_bytes(5, 'big') #计算包含补充等号的后五位 312 if padchars == 1: #填充了一个字符位 313 decoded[-5:] = last[:-1] 314 elif padchars == 3: #填充了两个字符位 315 decoded[-5:] = last[:-2] 316 elif padchars == 4: #填充了三个字符位 317 decoded[-5:] = last[:-3] 318 elif padchars == 6: #填充了四个字符位 319 decoded[-5:] = last[:-4] 320 else: 321 raise binascii.Error('Incorrect padding') 322 return bytes(decoded)