本文為學習筆記備忘。
注:本過程是根據已知的POI興趣點的名稱爬取AOI,有可能只返回POI數據。
基本思路:
1.首先訪問https://map.baidu.com/,然后,在搜索框中輸入興趣點的名稱,例如搜索“河南省人民醫院”,並且打開開發人員工具,查找對應的url。如下圖:
2.找到對應的url,為https://map.baidu.com/?newmap=1&qt=s&da_src=searchBox.button&wd='+name+'&c=268,其中name為要搜索點的名稱。
3.最后,利用相應的url,獲取AOI數據。具體代碼如下:
1 '''@author: zql 2 ''' 3 #!/usr/bin/python 4 # -*- coding:utf-8 -*- 5 #項目名稱:爬取百度地圖的AOI數據 6 #詳細描述: 7 import json 8 9 import urllib 10 from urllib import request 11 import requests 12 from pprint import pprint 13 14 from prettytable import PrettyTable 15 import random 16 17 18 19 #from station import stations 20 import warnings 21 import xdrlib ,sys 22 import xlrd 23 import time 24 import socket 25 26 #bd墨卡托轉BD-09 27 import math 28 pi = 3.1415926535897932384626 29 def Yr(lnglat,b): 30 if b!='': 31 c=b[0]+b[1]*abs(lnglat[0]) 32 d=abs(lnglat[1]/b[9]) 33 d=b[2]+b[3]*d+b[4]*d*d+b[5]*d*d*d+b[6]*d*d*d*d+b[7]*d*d*d*d*d+b[8]*d*d*d*d*d*d 34 if 0>lnglat[0]: 35 bd=-1*c 36 else: 37 bd=c 38 lnglat[0]=bd 39 if 0 > lnglat[0]: 40 bd2 = -1 * d 41 else: 42 bd2 = d 43 lnglat[1] = bd2 44 return lnglat 45 return 46 def Mecator2BD09(lng,lat): 47 lnglat=[0,0] 48 Au=[[1.410526172116255E-8, 8.98305509648872E-6, -1.9939833816331, 200.9824383106796, -187.2403703815547, 49 91.6087516669843, -23.38765649603339, 2.57121317296198, -0.03801003308653, 1.73379812E7], 50 [- 7.435856389565537E-9, 8.983055097726239E-6, -0.78625201886289, 96.32687599759846, -1.85204757529826, 51 -59.36935905485877, 47.40033549296737, -16.50741931063887, 2.28786674699375, 1.026014486E7], 52 [- 3.030883460898826E-8, 8.98305509983578E-6, 0.30071316287616, 59.74293618442277, 7.357984074871, 53 -25.38371002664745, 13.45380521110908, -3.29883767235584, 0.32710905363475, 6856817.37], 54 [- 1.981981304930552E-8, 8.983055099779535E-6, 0.03278182852591, 40.31678527705744, 0.65659298677277, 55 -4.44255534477492, 0.85341911805263, 0.12923347998204, -0.04625736007561, 4482777.06], 56 [3.09191371068437E-9, 8.983055096812155E-6, 6.995724062E-5, 23.10934304144901, -2.3663490511E-4, 57 -0.6321817810242, -0.00663494467273, 0.03430082397953, -0.00466043876332, 2555164.4], 58 [2.890871144776878E-9, 8.983055095805407E-6, -3.068298E-8, 7.47137025468032, -3.53937994E-6, -0.02145144861037, 59 -1.234426596E-5, 1.0322952773E-4, -3.23890364E-6, 826088.5]] 60 Sp=[1.289059486E7, 8362377.87, 5591021, 3481989.83, 1678043.12, 0 ] 61 lnglat[0]=math.fabs(lng) 62 lnglat[1] =abs(lat) 63 for d in range(0,6): 64 if lnglat[1]>=Sp[d]: 65 c=Au[d] 66 break 67 lnglat=Yr(lnglat,c) 68 return lnglat 69 def BD092WGS84(lnglat): 70 #bd09-gcj 71 72 x_pi = 3.14159265358979324 * 3000.0 / 180.0 73 pi = 3.1415926535897932384626 # π 74 a = 6378245.0 # 長半軸 75 ee = 0.00669342162296594323 # 扁率 76 x = lnglat[0] - 0.0065 77 y = lnglat[1] - 0.006 78 z = math.sqrt(x * x + y * y) - 0.00002 * math.sin(y * x_pi) 79 theta = math.atan2(y, x) - 0.000003 * math.cos(x * x_pi) 80 lnglat[0] = z * math.cos(theta) 81 lnglat[1] = z * math.sin(theta) 82 83 dlat = tranlat1(lnglat[0] - 105.0, lnglat[1] - 35.0) 84 dlng = tranlng1(lnglat[0] - 105.0, lnglat[1] - 35.0) 85 radlat = lnglat[1] / 180.0 * pi 86 magic = math.sin(radlat) 87 magic = 1 - ee * magic * magic 88 sqrtmagic = math.sqrt(magic) 89 dlat = (dlat * 180.0) / ((a * (1 - ee)) / (magic * sqrtmagic) * pi) 90 dlng = (dlng * 180.0) / (a / sqrtmagic * math.cos(radlat) * pi) 91 mglat = lnglat[1] + dlat 92 mglng = lnglat[0] + dlng 93 return [lnglat[0]* 2 - mglng, lnglat[1] * 2 - mglat] 94 def tranlat1(lng, lat): 95 ret = -100.0 + 2.0 * lng + 3.0 * lat + 0.2 * lat * lat + 0.1 * lng * lat + 0.2 * math.sqrt(math.fabs(lng)) 96 ret += (20.0 * math.sin(6.0 * lng * pi) + 20.0 * 97 math.sin(2.0 * lng * pi)) * 2.0 / 3.0 98 ret += (20.0 * math.sin(lat * pi) + 40.0 * 99 math.sin(lat / 3.0 * pi)) * 2.0 / 3.0 100 ret += (160.0 * math.sin(lat / 12.0 * pi) + 320 * 101 math.sin(lat * pi / 30.0)) * 2.0 / 3.0 102 return ret 103 def tranlng1(lng, lat): 104 ret = 300.0 + lng + 2.0 * lat + 0.1 * lng * lng + \ 105 0.1 * lng * lat + 0.1 * math.sqrt(math.fabs(lng)) 106 ret += (20.0 * math.sin(6.0 * lng * pi) + 20.0 * 107 math.sin(2.0 * lng * pi)) * 2.0 / 3.0 108 ret += (20.0 * math.sin(lng * pi) + 40.0 * 109 math.sin(lng / 3.0 * pi)) * 2.0 / 3.0 110 ret += (150.0 * math.sin(lng / 12.0 * pi) + 300.0 * 111 math.sin(lng / 30.0 * pi)) * 2.0 / 3.0 112 return ret 113 114 HEADERS = {'Accept':'*/*', 115 'Accept-Encoding':'gzip, deflate, sdch, br', 116 'Accept-Language':'zh-CN,zh;q=0.8', 117 'Connection':'keep-alive', 118 'Cookie':'BAIDUID=C4D08149D7EE627DC037119413418CA3:FG=1; BIDUPSID=C4D08149D7EE627DC037119413418CA3; PSTM=1540284487; pgv_pvi=9789244416; BDUSS=GF-S3Y5c1MybnhoTkhwMUxyWEhHM3ZreW1UTURiQk1TUFllMWc5V1ZWeUVHNGhkRVFBQUFBJCQAAAAAAAAAAAEAAAA~7j45ztLKx8DtuaTIyzMyMQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAISOYF2EjmBdV; session_id=1567581077599; session_name=; validate=31187; MCITY=-%3A; M_LG_UID=960425535; M_LG_SALT=2cf3bbbbd3e5466b66a2529c037b982b', 119 'Host':'map.baidu.com', 120 'Referer':'https://map.baidu.com/search/%E9%83%91%E5%B7%9E%E5%B8%82%E4%BA%8C%E4%B8%83%E4%B8%87%E8%BE%BE%E4%B8%89%E5%8F%B7%E9%99%A2/@12650489.832882352,4101769.7450000006,18.35z/maptype%3DB_EARTH_MAP?querytype=s&da_src=shareurl&wd=%E9%83%91%E5%B7%9E%E5%B8%82%E4%BA%8C%E4%B8%83%E4%B8%87%E8%BE%BE%E4%B8%89%E5%8F%B7%E9%99%A2&c=268&src=0&pn=0&sug=0&l=18&b=(12650755.24571287,4101867.1117821783;12651854.04020792,4102020.4126732675)&from=webmap&biz_forward=%7B%22scaler%22:1,%22styles%22:%22pl%22%7D&device_ratio=1', 121 'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36', 122 } 123 124 125 126 #name_data = xlrd.open_workbook('H:\\2019newstudy\\Big data and population distribution\\小論文_可達性研究\\lunwen_data\\鄭州金水區醫療poi.xlsx') 127 name_data = xlrd.open_workbook('H:\\2019newstudy\\Big data and population distribution\\data\\鄭州主城區\主城區_excel\\Accessibility\\kindergarten.xlsx') 128 table = name_data.sheet_by_index(5)#修改 129 nrows = table.nrows 130 131 for k in range(1,nrows):#含有標題,所以從第二行讀入 132 name = str(table.cell(k,0).value) 133 # f = stations[f1] 134 # for j in range(nrows): 135 # t1 = str(table.cell(j,1).value) 136 # t = stations[t1] 137 # d=str('2019-09-15') 138 #print ('正在查詢'+f+'至'+t+'的列車,請聽聽音樂...') 139 # name = '河南理工大學' 140 print(name) 141 #url = 'https://map.baidu.com/?newmap=1&qt=s&da_src=searchBox.button&wd=金水區'+name+'&c=268'#需修改的部分 142 url = 'https://map.baidu.com/?newmap=1&qt=s&da_src=searchBox.button&wd='+name+'&c=268' 143 warnings.filterwarnings("ignore") 144 try: 145 r = requests.get(url,headers=HEADERS, allow_redirects=True,verify=False) 146 # time.sleep(random.randint(1,2)) 147 data="" 148 with open('H:\\2019newstudy\\Big data and population distribution\\data\\鄭州主城區\主城區_excel\\Accessibility\\kindergarten_AOI.txt','a') as of: #修改 149 with open('H:\\2019newstudy\\Big data and population distribution\\data\\鄭州主城區\主城區_excel\\Accessibility\\kindergarten_except_AOI.txt','a') as of1: #修改 150 if r.content: 151 data=r.content 152 # if r.content.startswith(u'\ufeff'): 153 # data = r.content.encode('utf8')[3:].decode('utf8') 154 data=data.decode("UTF-8") 155 # data=data.decode("utf8") 156 data = json.loads(data) 157 AOI_id = data['result']['profile_uid'] 158 print(data['result']['profile_uid']) 159 uel_AOI = 'https://map.baidu.com/?newmap=1&qt=ext&uid='+AOI_id+'&ext_ver=new&ie=utf-8&l=11' 160 try: 161 r_AOI = requests.get(uel_AOI,headers=HEADERS, allow_redirects=True,verify=False) 162 data_AOI="" 163 164 if r_AOI.content: 165 data_AOI=r_AOI.content 166 data_AOI=data_AOI.decode("UTF-8") 167 data_AOI = json.loads(data_AOI) 168 try: 169 if 'geo' in data_AOI['content'] : 170 data_AOI['content']['geo'] 171 geo_AOI = data_AOI['content']['geo'] 172 geo_AOI = geo_AOI.split('|') 173 print(geo_AOI[2]) 174 point = geo_AOI[2].split(",") 175 point_transform = [] 176 for i in range(int(len(point)/2)):#全部點的坐標,分別是x,y,的形式 177 if i==0:#第一個點的x坐標刪除‘1-’ 178 print(point) 179 point[2*i] = point[2*i][2:] 180 if i==int((len(point)/2)-1):#最后的點的y坐標刪除‘;’ 181 point[2*i+1] = point[2*i+1][:-1] 182 print('各點的坐標',float(point[2*i]),float(point[2*i+1]))#打印出各點的坐標 183 point_Mecator2BD09 = Mecator2BD09(float(point[2*i]),float(point[2*i+1])) 184 point_BD092WGS84 = BD092WGS84(point_Mecator2BD09) 185 point_transform.append(point_BD092WGS84) 186 print(point_transform) 187 point_str = '' 188 for j in range(len(point_transform)): 189 point_str = point_str+(str(point_transform[j])).replace(' ','')[1:-1]+';' 190 print('轉換坐標后的坐標點',point_str) 191 192 193 194 of.write(name+' '+point_str+'\n') 195 if 'geo' not in data_AOI['content']: 196 of1.write(name+' '+'\n') 197 198 except socket.timeout: 199 print('失敗') 200 201 except socket.timeout: 202 print('失敗') 203 #time.sleep(random.randint(1,2)) 204 except socket.timeout: 205 print('失敗')
輸入的名稱數據如下:
獲得的結果: