login.js文件:
/** * Created by resolvewang on 2017/4/15. */ function getGid() { return "xxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx".replace(/[xy]/g, function (e) { var t = 16 * Math.random() | 0, n = "x" == e ? t : 3 & t | 8; return n.toString(16) }).toUpperCase() } function getCallback() { return "bd__cbs__" + Math.floor(2147483648 * Math.random()).toString(36) }
Pyhton實現代碼:
#-*- coding:utf-8 -*- __author__ = 'Administrator' import time import json import re import requests import execjs import base64 from urllib.parse import urlencode from requests_toolbelt import MultipartEncoder from Crypto.Cipher import PKCS1_v1_5 from Crypto.PublicKey import RSA from hashlib import md5 from zlib import crc32 try: requests.packages.urllib3.disable_warnings() except: pass headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_4) AppleWebKit/537.36 ' '(KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36', } # 全局的session session = requests.session() session.get('https://pan.baidu.com', headers=headers) class BufferReader(MultipartEncoder): """將multipart-formdata轉化為stream形式的Proxy類 """ def __init__(self, fields, boundary=None, callback=None, cb_args=(), cb_kwargs=None): self._callback = callback self._progress = 0 self._cb_args = cb_args self._cb_kwargs = cb_kwargs or {} super(BufferReader, self).__init__(fields, boundary) def read(self, size=None): chunk = super(BufferReader, self).read(size) self._progress += int(len(chunk)) self._cb_kwargs.update({ 'size': self._len, 'progress': self._progress }) if self._callback: try: self._callback(*self._cb_args, **self._cb_kwargs) except: # catches exception from the callback # raise CancelledError('The upload was cancelled.') pass return chunk def _get_runntime(): """ :param path: 加密js的路徑,注意js中不要使用中文!估計是pyexecjs處理中文還有一些問題 :return: 編譯后的js環境,不清楚pyexecjs這個庫的用法的請在github上查看相關文檔 """ phantom = execjs.get() # 這里必須為phantomjs設置環境變量,否則可以寫phantomjs的具體路徑 with open('login.js', 'r') as f: source = f.read() return phantom.compile(source) def get_gid(): return _get_runntime().call('getGid') def get_callback(): return _get_runntime().call('getCallback') def _get_curtime(): return int(time.time()*1000) # 抓包也不是百分百可靠啊,這里?getapi一定要挨着https://passport.baidu.com/v2/api/寫,才會到正確的路由 def get_token(gid, callback): cur_time = _get_curtime() get_data = { 'tpl': 'netdisk', 'subpro': 'netdisk_web', 'apiver': 'v3', 'tt': cur_time, 'class': 'login', 'gid': gid, 'logintype': 'basicLogin', 'callback': callback } headers.update(dict(Referer='http://pan.baidu.com/', Accept='*/*', Connection='keep-alive', Host='passport.baidu.com')) resp = session.get(url='https://passport.baidu.com/v2/api/?getapi', params=get_data, headers=headers) if resp.status_code == 200 and callback in resp.text: # 如果json字符串中帶有單引號,會解析出錯,只有統一成雙引號才可以正確的解析 #data = eval(re.search(r'.*?\((.*)\)', resp.text).group(1)) data = json.loads(re.search(r'.*?\((.*)\)', resp.text).group(1).replace("'", '"')) return data.get('data').get('token') else: print('獲取token失敗') return None def get_rsa_key(token, gid, callback): cur_time = _get_curtime() get_data = { 'token': token, 'tpl': 'netdisk', 'subpro': 'netdisk_web', 'apiver': 'v3', 'tt': cur_time, 'gid': gid, 'callback': callback, } resp = session.get(url='https://passport.baidu.com/v2/getpublickey', headers=headers, params=get_data) if resp.status_code == 200 and callback in resp.text: data = json.loads(re.search(r'.*?\((.*)\)', resp.text).group(1).replace("'", '"')) return data.get('pubkey'), data.get('key') else: print('獲取rsa key失敗') return None def encript_password(password, pubkey): """ import rsa 使用rsa庫加密(法一) pub = rsa.PublicKey.load_pkcs1_openssl_pem(pubkey.encode('utf-8')) encript_passwd = rsa.encrypt(password.encode('utf-8'), pub) return base64.b64encode(encript_passwd).decode('utf-8') """ # pubkey必須為bytes類型 pub=RSA.importKey(pubkey.encode('utf-8')) #構造“加密器” encryptor=PKCS1_v1_5.new(pub) #加密的內容必須為bytes類型 encript_passwd =encryptor.encrypt(password.encode('utf-8')) return base64.b64encode(encript_passwd).decode('utf-8') def login(token, gid, callback, rsakey, username, password): post_data = { 'staticpage': 'http://pan.baidu.com/res/static/thirdparty/pass_v3_jump.html', 'charset': 'utf-8', 'token': token, 'tpl': 'netdisk', 'subpro': 'netdisk_web', 'apiver': 'v3', 'tt': _get_curtime(), 'codestring': '', 'safeflg': 0, 'u': 'http://pan.baidu.com/disk/home', 'isPhone': '', 'detect': 1, 'gid': gid, 'quick_user': 0, 'logintype': 'basicLogin', 'logLoginType': 'pc_loginBasic', 'idc': '', 'loginmerge': 'true', 'foreignusername': '', 'username': username, 'password': password, 'mem_pass': 'on', # 返回的key 'rsakey': rsakey, 'crypttype': 12, 'ppui_logintime': 33554, 'countrycode': '', 'callback': 'parent.'+callback } resp = session.post(url='https://passport.baidu.com/v2/api/?login', data=post_data, headers=headers) if 'err_no=0' in resp.text: print('登錄成功') else: print('登錄失敗') def upload(dest_path,file_handle,token): params = { 'method': 'upload', 'app_id': "250528", 'BDUSS': session.cookies['BDUSS'], 't': str(int(time.time())), 'bdstoken': token, 'path': dest_path, 'ondup': "newcopy" } # print(params) files = {'file': (str(int(time.time())), file_handle)} url = 'https://{0}/rest/2.0/pcs/file'.format('pcs.baidu.com') api = '%s?%s' % (url, urlencode(params)) # print(api) body = BufferReader(files) # print(body) baibupan_header = {"Referer": "http://pan.baidu.com/disk/home", "User-Agent": "netdisk;4.6.2.0;PC;PC-Windows;10.0.10240;WindowsBaiduYunGuanJia"} header = dict(baibupan_header.items()) # print(headers) header.update({"Content-Type": body.content_type}) response = session.post(api, data=body, verify=False, headers=header) return response def rapidupload(dest_path,file_handler,token): """秒傳一個文件 :param file_handler: 文件handler, e.g. open('file','rb') :type file_handler: file :param dest_path: 上傳到服務器的路徑,包含文件名 :type dest_path: str :return: requests.Response .. note:: * 文件已在服務器上存在,不上傳,返回示例 { "path" : "/apps/album/1.jpg", "size" : 372121, "ctime" : 1234567890, "mtime" : 1234567890, "md5" : "cb123afcc12453543ef", "fs_id" : 12345, "isdir" : 0, "request_id" : 12314124 } * 文件不存在,需要上傳 {"errno":404,"info":[],"request_id":XXX} * 文件大小不足 256kb (slice-md5 == content-md5) 時 {"errno":2,"info":[],"request_id":XXX} * 遠程文件已存在 {"errno":-8,"info":[],"request_id":XXX} """ file_handler.seek(0, 2) _BLOCK_SIZE = 2 ** 20 content_length = file_handler.tell() file_handler.seek(0) # 校驗段為前 256KB first_256bytes = file_handler.read(256 * 1024) slice_md5 = md5(first_256bytes).hexdigest() content_crc32 = crc32(first_256bytes).conjugate() content_md5 = md5(first_256bytes) while True: block = file_handler.read(_BLOCK_SIZE) if not block: break # 更新crc32和md5校驗值 content_crc32 = crc32(block, content_crc32).conjugate() content_md5.update(block) params = { 'method': 'rapidupload', 'app_id': "250528", 'BDUSS': session.cookies['BDUSS'], 't': str(int(time.time())), 'bdstoken': token, 'path': dest_path, 'ondup': "newcopy" } data = { 'content-length': content_length, 'content-md5': content_md5.hexdigest(), 'slice-md5': slice_md5, 'content-crc32': '%d' % (content_crc32.conjugate() & 0xFFFFFFFF) } baibupan_header = {"Referer": "http://pan.baidu.com/disk/home", "User-Agent": "netdisk;4.6.2.0;PC;PC-Windows;10.0.10240;WindowsBaiduYunGuanJia"} header = dict(baibupan_header.items()) url = 'https://{0}/rest/2.0/pcs/file'.format('pcs.baidu.com') api = '%s?%s' % (url, urlencode(params)) # print(api) response= session.post(api, data=data, verify=False,headers=header) return response if __name__ == '__main__': user='xxx' #用戶名 password='xxx' #密碼 cur_gid = get_gid() cur_callback = get_callback() cur_token = get_token(cur_gid, cur_callback) # print("token:%s" %(cur_token)) cur_pubkey, cur_key = get_rsa_key(cur_token, cur_gid, cur_callback) encript_password = encript_password(password, cur_pubkey) login(cur_token, cur_gid, cur_callback, cur_key, user, encript_password) # print("cookies:%s" %(session.cookies['BDUSS'])) # res=upload("/hello/temp.txt",open("temp.txt",'rb'),cur_token) # print(res.content.decode('utf-8')) res=rapidupload("/hello/words.txt",open("words.txt",'rb'),cur_token) print(res.content.decode('utf-8'))
1 #-*- coding:utf-8 -*- 2 __author__ = 'Administrator' 3 4 import time 5 import json 6 import re 7 import requests 8 import execjs 9 import base64 10 from urllib.parse import urlencode 11 from requests_toolbelt import MultipartEncoder 12 from Crypto.Cipher import PKCS1_v1_5 13 from Crypto.PublicKey import RSA 14 from hashlib import md5 15 from zlib import crc32 16 # import progressbar 17 import sys 18 from contextlib import closing 19 import time 20 import os 21 22 try: 23 requests.packages.urllib3.disable_warnings() 24 except: 25 pass 26 27 headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_4) AppleWebKit/537.36 ' 28 '(KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36', 29 } 30 31 # 全局的session 32 session = requests.session() 33 session.get('https://pan.baidu.com', headers=headers) 34 35 36 class BufferReader(MultipartEncoder): 37 """將multipart-formdata轉化為stream形式的Proxy類 38 """ 39 40 def __init__(self, fields, boundary=None, callback=None, cb_args=(), cb_kwargs=None): 41 self._callback = callback 42 self._progress = 0 43 self._cb_args = cb_args 44 self._cb_kwargs = cb_kwargs or {} 45 super(BufferReader, self).__init__(fields, boundary) 46 47 def read(self, size=None): 48 chunk = super(BufferReader, self).read(size) 49 self._progress += int(len(chunk)) 50 self._cb_kwargs.update({ 51 'size': self._len, 52 'progress': self._progress 53 }) 54 if self._callback: 55 try: 56 self._callback(*self._cb_args, **self._cb_kwargs) 57 except: # catches exception from the callback 58 # raise CancelledError('The upload was cancelled.') 59 pass 60 return chunk 61 62 class ProgressBar(): 63 """ 64 import progressbar 65 使用第三方庫顯示上傳進度 66 67 """ 68 def __init__(self): 69 self.first_call = True 70 def __call__(self, *args, **kwargs): 71 if self.first_call: 72 self.widgets = [progressbar.Percentage(), ' ', progressbar.Bar(marker=progressbar.RotatingMarker('>')), 73 ' ', progressbar.FileTransferSpeed()] 74 self.pbar = progressbar.ProgressBar(widgets=self.widgets, maxval=kwargs['size']).start() 75 self.first_call = False 76 77 if kwargs['size'] <= kwargs['progress']: 78 self.pbar.finish() 79 else: 80 self.pbar.update(kwargs['progress']) 81 82 83 def _get_runntime(): 84 """ 85 :param path: 加密js的路徑,注意js中不要使用中文!估計是pyexecjs處理中文還有一些問題 86 :return: 編譯后的js環境,不清楚pyexecjs這個庫的用法的請在github上查看相關文檔 87 """ 88 phantom = execjs.get() # 這里必須為phantomjs設置環境變量,否則可以寫phantomjs的具體路徑 89 with open('login.js', 'r') as f: 90 source = f.read() 91 return phantom.compile(source) 92 93 def get_gid(): 94 return _get_runntime().call('getGid') 95 96 def get_callback(): 97 return _get_runntime().call('getCallback') 98 99 def _get_curtime(): 100 return int(time.time()*1000) 101 102 # 抓包也不是百分百可靠啊,這里?getapi一定要挨着https://passport.baidu.com/v2/api/寫,才會到正確的路由 103 def get_token(gid, callback): 104 cur_time = _get_curtime() 105 get_data = { 106 'tpl': 'netdisk', 107 'subpro': 'netdisk_web', 108 'apiver': 'v3', 109 'tt': cur_time, 110 'class': 'login', 111 'gid': gid, 112 'logintype': 'basicLogin', 113 'callback': callback 114 } 115 headers.update(dict(Referer='http://pan.baidu.com/', Accept='*/*', Connection='keep-alive', Host='passport.baidu.com')) 116 resp = session.get(url='https://passport.baidu.com/v2/api/?getapi', params=get_data, headers=headers) 117 if resp.status_code == 200 and callback in resp.text: 118 # 如果json字符串中帶有單引號,會解析出錯,只有統一成雙引號才可以正確的解析 119 #data = eval(re.search(r'.*?\((.*)\)', resp.text).group(1)) 120 data = json.loads(re.search(r'.*?\((.*)\)', resp.text).group(1).replace("'", '"')) 121 return data.get('data').get('token') 122 else: 123 print('獲取token失敗') 124 return None 125 126 def get_rsa_key(token, gid, callback): 127 cur_time = _get_curtime() 128 get_data = { 129 'token': token, 130 'tpl': 'netdisk', 131 'subpro': 'netdisk_web', 132 'apiver': 'v3', 133 'tt': cur_time, 134 'gid': gid, 135 'callback': callback, 136 } 137 resp = session.get(url='https://passport.baidu.com/v2/getpublickey', headers=headers, params=get_data) 138 if resp.status_code == 200 and callback in resp.text: 139 data = json.loads(re.search(r'.*?\((.*)\)', resp.text).group(1).replace("'", '"')) 140 return data.get('pubkey'), data.get('key') 141 else: 142 print('獲取rsa key失敗') 143 return None 144 145 def encript_password(password, pubkey): 146 """ 147 import rsa 148 使用rsa庫加密(法一) 149 pub = rsa.PublicKey.load_pkcs1_openssl_pem(pubkey.encode('utf-8')) 150 encript_passwd = rsa.encrypt(password.encode('utf-8'), pub) 151 return base64.b64encode(encript_passwd).decode('utf-8') 152 153 """ 154 # pubkey必須為bytes類型 155 pub=RSA.importKey(pubkey.encode('utf-8')) 156 #構造“加密器” 157 encryptor=PKCS1_v1_5.new(pub) 158 #加密的內容必須為bytes類型 159 encript_passwd =encryptor.encrypt(password.encode('utf-8')) 160 return base64.b64encode(encript_passwd).decode('utf-8') 161 162 def login(token, gid, callback, rsakey, username, password): 163 post_data = { 164 'staticpage': 'http://pan.baidu.com/res/static/thirdparty/pass_v3_jump.html', 165 'charset': 'utf-8', 166 'token': token, 167 'tpl': 'netdisk', 168 'subpro': 'netdisk_web', 169 'apiver': 'v3', 170 'tt': _get_curtime(), 171 'codestring': '', 172 'safeflg': 0, 173 'u': 'http://pan.baidu.com/disk/home', 174 'isPhone': '', 175 'detect': 1, 176 'gid': gid, 177 'quick_user': 0, 178 'logintype': 'basicLogin', 179 'logLoginType': 'pc_loginBasic', 180 'idc': '', 181 'loginmerge': 'true', 182 'foreignusername': '', 183 'username': username, 184 'password': password, 185 'mem_pass': 'on', 186 # 返回的key 187 'rsakey': rsakey, 188 'crypttype': 12, 189 'ppui_logintime': 33554, 190 'countrycode': '', 191 'callback': 'parent.'+callback 192 } 193 resp = session.post(url='https://passport.baidu.com/v2/api/?login', data=post_data, headers=headers) 194 if 'err_no=0' in resp.text: 195 print('登錄成功') 196 else: 197 print('登錄失敗') 198 def progressbar(size=None, progress=None,progress_title="已完成",finish_title="全部完成"): 199 #size:文件總字節數 progress:當前傳輸完成字節數 200 # print("{0} / {1}".format(size, progress)) 201 if progress<size: 202 sys.stdout.write(progress_title+": "+str(int((progress/size)*100))+' % '+"\r") 203 sys.stdout.flush() 204 else: 205 progress=size 206 sys.stdout.write(finish_title+": "+str(int((progress/size)*100))+' % '+"\n") 207 208 def upload(dest_path,file_handle,token,callback=None): 209 res=rapidupload(dest_path,file_handle,token) 210 # print(res.content.decode('utf-8')) 211 result=json.loads(res.content.decode('utf-8')) 212 if result.get("error_code",-1)==31079: 213 print("using upload....") 214 params = { 215 'method': 'upload', 216 'app_id': "250528", 217 'BDUSS': session.cookies['BDUSS'], 218 't': str(int(time.time())), 219 'bdstoken': token, 220 'path': dest_path, 221 'ondup': "newcopy" 222 } 223 # print(params) 224 files = {'file': (str(int(time.time())), file_handle)} 225 url = 'https://{0}/rest/2.0/pcs/file'.format('pcs.baidu.com') 226 api = '%s?%s' % (url, urlencode(params)) 227 # print(api) 228 body = BufferReader(files,callback=callback) 229 # print(body) 230 baibupan_header = {"Referer": "http://pan.baidu.com/disk/home", 231 "User-Agent": "netdisk;4.6.2.0;PC;PC-Windows;10.0.10240;WindowsBaiduYunGuanJia"} 232 header = dict(baibupan_header.items()) 233 # print(headers) 234 header.update({"Content-Type": body.content_type}) 235 response = session.post(api, data=body, verify=False, headers=header) 236 return response 237 else: 238 print("using rapidupload....") 239 return res 240 241 def rapidupload(dest_path,file_handler,token,callback=None): 242 """秒傳一個文件 243 :param file_handler: 文件handler, e.g. open('file','rb') 244 :type file_handler: file 245 246 :param dest_path: 上傳到服務器的路徑,包含文件名 247 :type dest_path: str 248 249 :return: requests.Response 250 .. note:: 251 * 文件已在服務器上存在,不上傳,返回示例 252 { 253 "path" : "/apps/album/1.jpg", 254 "size" : 372121, 255 "ctime" : 1234567890, 256 "mtime" : 1234567890, 257 "md5" : "cb123afcc12453543ef", 258 "fs_id" : 12345, 259 "isdir" : 0, 260 "request_id" : 12314124 261 } 262 * 文件不存在,需要上傳 263 {"errno":404,"info":[],"request_id":XXX} 264 * 文件大小不足 256kb (slice-md5 == content-md5) 時 265 {"errno":2,"info":[],"request_id":XXX} 266 * 遠程文件已存在 267 {"errno":-8,"info":[],"request_id":XXX} 268 """ 269 params = { 270 'method': 'rapidupload', 271 'app_id': "250528", 272 'BDUSS': session.cookies['BDUSS'], 273 't': str(int(time.time())), 274 'bdstoken': token, 275 'path': dest_path, 276 'ondup': "newcopy" 277 } 278 baibupan_header = {"Referer": "http://pan.baidu.com/disk/home", 279 "User-Agent": "netdisk;4.6.2.0;PC;PC-Windows;10.0.10240;WindowsBaiduYunGuanJia"} 280 header = dict(baibupan_header.items()) 281 url = 'https://{0}/rest/2.0/pcs/file'.format('pcs.baidu.com') 282 api = '%s?%s' % (url, urlencode(params)) 283 284 file_handler.seek(0, 2) 285 _BLOCK_SIZE = 2 ** 20 #1MB大小 286 # print(_BLOCK_SIZE) 287 content_length = file_handler.tell() 288 # print(content_length) 289 file_handler.seek(0) 290 291 # 校驗段為前 256KB 292 first_256bytes = file_handler.read(256 * 1024) 293 slice_md5 = md5(first_256bytes).hexdigest() 294 295 content_crc32 = crc32(first_256bytes).conjugate() 296 content_md5 = md5(first_256bytes) 297 298 # data = { 299 # 'content-length': content_length, 300 # 'content-md5': content_md5.hexdigest(), 301 # 'slice-md5': slice_md5, 302 # 'content-crc32': '%d' % (content_crc32.conjugate() & 0xFFFFFFFF) 303 # } 304 # response= session.post(api, data=data, verify=False,headers=header) 305 # return response 306 307 count=1 308 while True: 309 block = file_handler.read(_BLOCK_SIZE) 310 if callback: 311 callback(size=content_length,progress=count*_BLOCK_SIZE) 312 count=count+1 313 if not block: 314 break 315 # 更新crc32和md5校驗值 316 content_crc32 = crc32(block, content_crc32).conjugate() 317 content_md5.update(block) 318 data = { 319 'content-length': content_length, 320 'content-md5': content_md5.hexdigest(), 321 'slice-md5': slice_md5, 322 'content-crc32': '%d' % (content_crc32.conjugate() & 0xFFFFFFFF) 323 } 324 325 response= session.post(api, data=data, verify=False,headers=header) 326 return response 327 328 def download(remote_path,file_path,token): 329 """下載單個文件。 330 download 接口支持HTTP協議標准range定義,通過指定range的取值可以實現 331 斷點下載功能。 例如:如果在request消息中指定“Range: bytes=0-99”, 332 那么響應消息中會返回該文件的前100個字節的內容; 333 繼續指定“Range: bytes=100-199”, 334 那么響應消息中會返回該文件的第二個100字節內容:: 335 >>> headers = {'Range': 'bytes=0-99'} 336 >>> pcs = PCS('username','password') 337 >>> pcs.download('/test_sdk/test.txt', headers=headers) 338 :param remote_path: 網盤中文件的路徑(包含文件名)。 339 必須以 / 開頭。 340 .. warning:: 341 * 路徑長度限制為1000; 342 * 徑中不能包含以下字符:``\\\\ ? | " > < : *``; 343 * 文件名或路徑名開頭結尾不能是 ``.`` 344 或空白字符,空白字符包括: 345 ``\\r, \\n, \\t, 空格, \\0, \\x0B`` 。 346 :return: requests.Response 對象 347 """ 348 params = { 349 'method': 'download', 350 'app_id': "250528", 351 'BDUSS': session.cookies['BDUSS'], 352 't': str(int(time.time())), 353 'bdstoken': token, 354 'path':remote_path 355 } 356 # 兼容原有域名pcs.baidu.com;使用新域名d.pcs.baidu.com,則提供更快、更穩定的下載服務 357 url = 'https://{0}/rest/2.0/pcs/file'.format('d.pcs.baidu.com') 358 baibupan_header = {"Referer": "http://pan.baidu.com/disk/home", 359 "User-Agent": "netdisk;4.6.2.0;PC;PC-Windows;10.0.10240;WindowsBaiduYunGuanJia"} 360 header = dict(baibupan_header.items()) 361 # print(headers) 362 # header.update({'Range': 'bytes=0-1024'}) #返回1KB內容 363 # response = session.get(url, params=params, verify=False, headers=header) 364 # print(response.headers) 365 # print(response.headers['content-length']) 366 with closing(session.get(url, params=params, verify=False, headers=header,stream=True)) as response: 367 chunk_size=1024 #單次請求最大值 368 count=1 369 total_size=int(response.headers['content-length']) #內容體總大小 370 with open(file_path,'wb') as file: 371 for data in response.iter_content(chunk_size=chunk_size): 372 file.write(data) 373 progressbar(size=total_size,progress=count*chunk_size,progress_title="正在下載",finish_title="下載完成") 374 count=count+1 375 376 """ 377 通過斷點續傳一點一點下載 378 start=0 379 stop=1023 380 while True: 381 chunk_size='bytes={0}-{1}'.format(start,stop) 382 header.update({'Range': chunk_size}) #返回1KB內容 383 response = session.get(url, params=params, verify=False, headers=header) 384 # print(response.apparent_encoding) 385 if response.content: 386 with open(file_path,'ab') as file: 387 file.write(response.content) 388 start=start+1024 389 stop=stop+1024 390 else: 391 break 392 393 """ 394 395 def get_filesize(rote_path,token): 396 """獲得文件(s)的meta 397 :param rote_path: 文件路徑,如 '/aaa.txt' 398 """ 399 params = { 400 'method': 'meta', 401 'app_id': "250528", 402 'BDUSS': session.cookies['BDUSS'], 403 't': str(int(time.time())), 404 'bdstoken': token, 405 'path':rote_path 406 } 407 # url="https://pcs.baidu.com/rest/2.0/pcs/file" 408 url = 'https://{0}/rest/2.0/pcs/file'.format('pcs.baidu.com') 409 # api = '%s?%s' % (url, urlencode(params)) 410 baibupan_header = {"Referer": "http://pan.baidu.com/disk/home", 411 "User-Agent": "netdisk;4.6.2.0;PC;PC-Windows;10.0.10240;WindowsBaiduYunGuanJia"} 412 header = dict(baibupan_header.items()) 413 # print(headers) 414 response = session.get(url,params=params,verify=False, headers=header) 415 return response 416 417 def meta(file_list,token): 418 """獲得文件(s)的metainfo 419 420 :param file_list: 文件路徑列表,如 ['/aaa.txt'] 421 :type file_list: list 422 423 :return: requests.Response 424 .. note :: 425 示例 426 427 * 文件不存在 428 429 {"errno":12,"info":[{"errno":-9}],"request_id":3294861771} 430 431 * 文件存在 432 { 433 "errno": 0, 434 435 "info": [ 436 437 { 438 439 "fs_id": 文件id, 440 441 "path": "\/\u5c0f\u7c73\/mi2s\u5237recovery.rar", 442 443 "server_filename": "mi2s\u5237recovery.rar", 444 445 "size": 8292134, 446 447 "server_mtime": 1391274570, 448 449 "server_ctime": 1391274570, 450 451 "local_mtime": 1391274570, 452 453 "local_ctime": 1391274570, 454 455 "isdir": 0, 456 457 "category": 6, 458 459 "path_md5": 279827390796736883, 460 461 "delete_fs_id": 0, 462 463 "object_key": "84221121-2193956150-1391274570512754", 464 465 "block_list": [ 466 "76b469302a02b42fd0a548f1a50dd8ac" 467 ], 468 469 "md5": "76b469302a02b42fd0a548f1a50dd8ac", 470 471 "errno": 0 472 473 } 474 475 ], 476 477 "request_id": 2964868977 478 479 } 480 481 """ 482 if not isinstance(file_list, list): 483 file_list = [file_list] 484 data = {'target': json.dumps(file_list)} 485 params = { 486 'method': 'filemetas', 487 'app_id': "250528", 488 'BDUSS': session.cookies['BDUSS'], 489 't': str(int(time.time())), 490 'bdstoken': token 491 } 492 print(token) 493 baibupan_header = {"Referer": "http://pan.baidu.com/disk/home", 494 "User-Agent": "netdisk;4.6.2.0;PC;PC-Windows;10.0.10240;WindowsBaiduYunGuanJia"} 495 header = dict(baibupan_header.items()) 496 uri='filemetas?blocks=0&dlink=1' 497 url='http://pan.baidu.com/api/{0}'.format(uri) 498 print(url) 499 if '?' in url: 500 api = "%s&%s" % (url, urlencode(params)) 501 else: 502 api = '%s?%s' % (url, urlencode(params)) 503 print(api) 504 print(data) 505 response=session.post(api,data=data,verify=False,headers=header) 506 return response 507 # return self._request('filemetas?blocks=0&dlink=1', 'filemetas', data=data, **kwargs) 508 509 if __name__ == '__main__': 510 user='xxx' 511 password='xxx' 512 513 cur_gid = get_gid() 514 cur_callback = get_callback() 515 cur_token = get_token(cur_gid, cur_callback) 516 # print("token:%s" %(cur_token)) 517 cur_pubkey, cur_key = get_rsa_key(cur_token, cur_gid, cur_callback) 518 encript_password = encript_password(password, cur_pubkey) 519 login(cur_token, cur_gid, cur_callback, cur_key, user, encript_password) 520 # print("cookies:%s" %(session.cookies['BDUSS'])) 521 522 res=upload("/hello/word.py",open("test_BaiduPan.py",'rb'),cur_token,callback=progressbar) 523 print(res.content.decode('utf-8')) 524 525 526 # res=rapidupload("/hello/traindata.js",open("login.js",'rb'),cur_token,callback=progressbar) 527 # print(json.loads(res.content.decode('utf-8'))) 528 529 530 # download("/hello/words.txt","word.txt",cur_token) 531 # print(res.content.decode('utf-8')) 532 533 # res=get_filesize("/hello/words",cur_token) 534 # print(res.content.decode('utf-8')) 535 536 # res=meta("/hello/words.txt",cur_token) 537 # print(res.content)
1 #-*- coding:utf-8 -*- 2 __author__ = 'Administrator' 3 4 import time 5 import json 6 import re 7 import requests 8 import execjs 9 import base64 10 from urllib.parse import urlencode 11 from requests_toolbelt import MultipartEncoder 12 from Crypto.Cipher import PKCS1_v1_5 13 from Crypto.PublicKey import RSA 14 from hashlib import md5 15 from zlib import crc32 16 # import progressbar 17 import sys 18 from contextlib import closing 19 import time 20 import os 21 from io import BytesIO 22 23 try: 24 requests.packages.urllib3.disable_warnings() 25 except: 26 pass 27 28 # class BufferReader(MultipartEncoder): 29 # """將multipart-formdata轉化為stream形式的Proxy類 30 # """ 31 # def __init__(self, fields, boundary=None, callback=None, cb_args=(), cb_kwargs=None): 32 # self._callback = callback 33 # self._progress = 0 34 # self._cb_args = cb_args 35 # self._cb_kwargs = cb_kwargs or {} 36 # super(BufferReader, self).__init__(fields, boundary) 37 # 38 # def read(self, size=None): 39 # chunk = super(BufferReader, self).read(size) 40 # self._progress += int(len(chunk)) 41 # self._cb_kwargs.update({ 42 # 'size': self._len, 43 # 'progress': self._progress 44 # }) 45 # if self._callback: 46 # try: 47 # self._callback(*self._cb_args, **self._cb_kwargs) 48 # except: # catches exception from the callback 49 # # raise CancelledError('The upload was cancelled.') 50 # pass 51 # return chunk 52 53 class BufferReader(BytesIO): 54 """ 55 """ 56 def __init__(self, filebytes, callback=None): 57 self._callback = callback 58 self._progress = 0 59 self._size =len(filebytes) 60 super(BufferReader, self).__init__(filebytes) 61 62 def read(self, size=-1): 63 chunk_size=8192 64 chunk = BytesIO.read(self,chunk_size) 65 self._progress += int(len(chunk)) 66 if self._callback: 67 self._callback(self._size,self._progress) 68 return chunk 69 70 class PCSBase(): 71 def __init__(self,username,password): 72 self.session=requests.session() 73 self.headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_4) AppleWebKit/537.36 ' 74 '(KHTML, like Gecko) Chrome/57.0.2987.133 Safari/537.36', 75 } 76 self.session.get('https://pan.baidu.com', headers=self.headers) 77 self.username=username 78 self.password=password 79 self.user={} 80 self.cur_gid=self.get_gid() 81 self.cur_callback=self.get_callback() 82 self.cur_time=self._get_curtime() 83 self._initiate()#登錄成功,並獲取session.cookies 84 85 def _initiate(self): 86 self.user['token']= self.get_token() 87 # print("token:%s" %(self.get_token())) 88 self.login() 89 # print("cookies:%s" %(session.cookies['BDUSS'])) 90 def _get_runntime(self): 91 """ 92 :param path: 加密js的路徑,注意js中不要使用中文!估計是pyexecjs處理中文還有一些問題 93 :return: 編譯后的js環境,不清楚pyexecjs這個庫的用法的請在github上查看相關文檔 94 """ 95 phantom = execjs.get() # 這里必須為phantomjs設置環境變量,否則可以寫phantomjs的具體路徑 96 with open('login.js', 'r') as f: 97 source = f.read() 98 return phantom.compile(source) 99 100 def get_gid(self): 101 return self._get_runntime().call('getGid') 102 103 def get_callback(self): 104 return self._get_runntime().call('getCallback') 105 106 def _get_curtime(self): 107 return int(time.time()*1000) 108 # 抓包也不是百分百可靠啊,這里?getapi一定要挨着https://passport.baidu.com/v2/api/寫,才會到正確的路由 109 def get_token(self): 110 get_data = { 111 'tpl': 'netdisk', 112 'subpro': 'netdisk_web', 113 'apiver': 'v3', 114 'tt':self.cur_time, 115 'class': 'login', 116 'gid': self.cur_gid, 117 'logintype': 'basicLogin', 118 'callback': self.cur_callback 119 } 120 self.headers.update(dict(Referer='http://pan.baidu.com/', Accept='*/*', Connection='keep-alive', Host='passport.baidu.com')) 121 resp = self.session.get(url='https://passport.baidu.com/v2/api/?getapi', params=get_data, headers=self.headers) 122 if resp.status_code == 200 and self.cur_callback in resp.text: 123 # 如果json字符串中帶有單引號,會解析出錯,只有統一成雙引號才可以正確的解析 124 #data = eval(re.search(r'.*?\((.*)\)', resp.text).group(1)) 125 data = json.loads(re.search(r'.*?\((.*)\)', resp.text).group(1).replace("'", '"')) 126 return data.get('data').get('token') 127 else: 128 print('獲取token失敗') 129 return None 130 131 def get_rsa_key(self): 132 get_data = { 133 'token': self.user['token'], 134 'tpl': 'netdisk', 135 'subpro': 'netdisk_web', 136 'apiver': 'v3', 137 'tt': self.cur_time, 138 'gid': self.cur_gid, 139 'callback': self.cur_callback 140 } 141 resp = self.session.get(url='https://passport.baidu.com/v2/getpublickey', headers=self.headers, params=get_data) 142 if resp.status_code == 200 and self.cur_callback in resp.text: 143 data = json.loads(re.search(r'.*?\((.*)\)', resp.text).group(1).replace("'", '"')) 144 return data.get('pubkey'), data.get('key') 145 else: 146 print('獲取rsa key失敗') 147 return None 148 149 def encript_password(self,pubkey): 150 """ 151 import rsa 152 使用rsa庫加密(法一) 153 pub = rsa.PublicKey.load_pkcs1_openssl_pem(pubkey.encode('utf-8')) 154 encript_passwd = rsa.encrypt(password.encode('utf-8'), pub) 155 return base64.b64encode(encript_passwd).decode('utf-8') 156 157 """ 158 # pubkey必須為bytes類型 159 pub=RSA.importKey(pubkey.encode('utf-8')) 160 #構造“加密器” 161 encryptor=PKCS1_v1_5.new(pub) 162 #加密的內容必須為bytes類型 163 encript_passwd =encryptor.encrypt(self.password.encode('utf-8')) 164 return base64.b64encode(encript_passwd).decode('utf-8') 165 166 def login(self): 167 cur_pubkey, cur_key = self.get_rsa_key() 168 encript_password =self.encript_password(cur_pubkey) 169 post_data = { 170 'staticpage': 'http://pan.baidu.com/res/static/thirdparty/pass_v3_jump.html', 171 'charset': 'utf-8', 172 'token': self.user['token'], 173 'tpl': 'netdisk', 174 'subpro': 'netdisk_web', 175 'apiver': 'v3', 176 'tt': self.cur_time, 177 'codestring': '', 178 'safeflg': 0, 179 'u': 'http://pan.baidu.com/disk/home', 180 'isPhone': '', 181 'detect': 1, 182 'gid': self.cur_gid, 183 'quick_user': 0, 184 'logintype': 'basicLogin', 185 'logLoginType': 'pc_loginBasic', 186 'idc': '', 187 'loginmerge': 'true', 188 'foreignusername': '', 189 'username': self.username, 190 'password': encript_password, 191 'mem_pass': 'on', 192 # 返回的key 193 'rsakey': cur_key, 194 'crypttype': 12, 195 'ppui_logintime': 33554, 196 'countrycode': '', 197 'callback': 'parent.'+self.cur_callback 198 } 199 resp = self.session.post(url='https://passport.baidu.com/v2/api/?login', data=post_data, headers=self.headers) 200 if 'err_no=0' in resp.text: 201 print('登錄成功') 202 self.user['BDUSS'] = self.session.cookies['BDUSS'] 203 else: 204 print('登錄失敗') 205 self.user['BDUSS']=None 206 207 def _request(self,url,data=None,files=None,extra_params=None,callback=None): 208 params={ 209 'app_id': "250528", 210 'BDUSS': self.user['BDUSS'], 211 't': str(int(time.time())), 212 'bdstoken': self.user['token'] 213 } 214 if extra_params: 215 params.update(extra_params) 216 # print("params:%s" %params) 217 baibupan_header = {"Referer": "http://pan.baidu.com/disk/home", 218 "User-Agent": "netdisk;4.6.2.0;PC;PC-Windows;10.0.10240;WindowsBaiduYunGuanJia"} 219 header= dict(baibupan_header.items()) 220 if data or files: 221 api = '%s?%s' % (url, urlencode(params)) 222 # print("api:%s" %api) 223 if data: 224 res=self.session.post(api,data=data,verify=False, headers=header) 225 return res 226 else: 227 # print(callback==None) 228 (filedata,contenttype)=requests.packages.urllib3.filepost.encode_multipart_formdata(files)
body=BufferReader(filedata,callback=callback)
229 # print("body:%s" %type(body)) 230 header.update({ 231 "Content-Type": contenttype 232 }) 233 # print("header:%s" %header) 234 res=self.session.post(api,data=body,verify=False, headers=header) 235 return res 236 else: 237 res=self.session.get(url,params=params,verify=False, headers=header,stream=True) 238 return res 239 240 class PCS(PCSBase): 241 def __init__(self,username,password): 242 self.username=username 243 self.password=password 244 super(PCS,self).__init__(self.username,self.password) 245 246 def upload(self,remote_path,file_handler,callback=None): 247 params={ 248 'method': 'upload', 249 'path': remote_path, 250 'ondup': "newcopy" 251 } 252 files = {'file': (str(int(time.time())), file_handler)} 253 url = 'https://{0}/rest/2.0/pcs/file'.format('pcs.baidu.com') 254 response=self._request(url,files=files,extra_params=params,callback=callback) 255 return response 256 257 def rapid_upload(self,remote_path,file_handler,callback=None): 258 params={ 259 'method':"rapidupload", 260 'path':remote_path, 261 'ondup':"newcopy" 262 } 263 url = 'https://{0}/rest/2.0/pcs/file'.format('pcs.baidu.com') 264 file_handler.seek(0, 2) 265 _BLOCK_SIZE = 2 ** 20 #1MB大小 266 # print(_BLOCK_SIZE) 267 content_length = file_handler.tell() 268 # print(content_length) 269 file_handler.seek(0) 270 271 # 校驗段為前 256KB 272 first_256bytes = file_handler.read(256 * 1024) 273 slice_md5 = md5(first_256bytes).hexdigest() 274 275 content_crc32 = crc32(first_256bytes).conjugate() 276 content_md5 = md5(first_256bytes) 277 278 count=1 279 while True: 280 block = file_handler.read(_BLOCK_SIZE) 281 if callback: 282 callback(size=content_length,progress=count*_BLOCK_SIZE) 283 count=count+1 284 if not block: 285 break 286 # 更新crc32和md5校驗值 287 content_crc32 = crc32(block, content_crc32).conjugate() 288 content_md5.update(block) 289 data = { 290 'content-length': content_length, 291 'content-md5': content_md5.hexdigest(), 292 'slice-md5': slice_md5, 293 'content-crc32': '%d' % (content_crc32.conjugate() & 0xFFFFFFFF) 294 } 295 response=self._request(url,data=data,extra_params=params,callback=callback) 296 return response 297 298 def download(self,remote_path,local_path,callback=None): 299 params={ 300 'method':"download", 301 'path':remote_path 302 } 303 # 兼容原有域名pcs.baidu.com;使用新域名d.pcs.baidu.com,則提供更快、更穩定的下載服務 304 url = 'https://{0}/rest/2.0/pcs/file'.format('d.pcs.baidu.com') 305 with closing(self._request(url, extra_params=params)) as response: 306 chunk_size=1024 #單次請求最大值 307 count=1 308 total_size=int(response.headers['content-length']) #內容體總大小 309 with open(local_path,'wb') as file: 310 for data in response.iter_content(chunk_size=chunk_size): 311 file.write(data) 312 self.progressbar(size=total_size,progress=count*chunk_size,progress_title="正在下載",finish_title="下載完成") 313 count=count+1 314 315 def progressbar(self,size=None, progress=None,progress_title="正在上傳",finish_title="上傳完成"): 316 #size:文件總字節數 progress:當前傳輸完成字節數 317 # print("{0} / {1}".format(size, progress)) 318 if progress<size: 319 sys.stdout.write(progress_title+": "+str(int((progress/size)*100))+' % '+"\r") 320 sys.stdout.flush() 321 else: 322 progress=size 323 sys.stdout.write(finish_title+": "+str(int((progress/size)*100))+' % '+"\n") 324 325 326 if __name__ == '__main__': 327 username="xxx" 328 password="xxx" 329 pcs=PCS(username,password) 330 res=pcs.upload("/hello/word.js",open("login.js",'rb').read(),callback=pcs.progressbar) 331 print(res.content.decode('utf-8')) 332 res=pcs.rapid_upload("/hello/word.js",open("login.js",'rb'),callback=pcs.progressbar) 333 print(res.content.decode('utf-8')) 334 pcs.download("/hello/word.js","temp.js")