利用名稱爬取百度AOI


本文為學習筆記備忘。

注:本過程是根據已知的POI興趣點的名稱爬取AOI,有可能只返回POI數據。

基本思路:

1.首先訪問https://map.baidu.com/,然后,在搜索框中輸入興趣點的名稱,例如搜索“河南省人民醫院”,並且打開開發人員工具,查找對應的url。如下圖:

2.找到對應的url,為https://map.baidu.com/?newmap=1&qt=s&da_src=searchBox.button&wd='+name+'&c=268,其中name為要搜索點的名稱。

3.最后,利用相應的url,獲取AOI數據。具體代碼如下:

  1 '''@author: zql
  2 '''
  3 #!/usr/bin/python
  4 # -*- coding:utf-8 -*-
  5 #項目名稱:爬取百度地圖的AOI數據
  6 #詳細描述:
  7 import json
  8 
  9 import urllib
 10 from urllib import request
 11 import requests
 12 from pprint import pprint
 13 
 14 from prettytable import PrettyTable
 15 import random
 16 
 17 
 18 
 19 #from station import stations
 20 import warnings
 21 import  xdrlib ,sys
 22 import xlrd
 23 import time
 24 import socket
 25 
 26 #bd墨卡托轉BD-09
 27 import math
 28 pi = 3.1415926535897932384626
 29 def Yr(lnglat,b):
 30     if b!='':
 31         c=b[0]+b[1]*abs(lnglat[0])
 32         d=abs(lnglat[1]/b[9])
 33         d=b[2]+b[3]*d+b[4]*d*d+b[5]*d*d*d+b[6]*d*d*d*d+b[7]*d*d*d*d*d+b[8]*d*d*d*d*d*d
 34         if 0>lnglat[0]:
 35             bd=-1*c
 36         else:
 37             bd=c
 38         lnglat[0]=bd
 39         if 0 > lnglat[0]:
 40             bd2 = -1 * d
 41         else:
 42             bd2 = d
 43         lnglat[1] = bd2
 44         return lnglat
 45     return
 46 def Mecator2BD09(lng,lat):
 47     lnglat=[0,0]
 48     Au=[[1.410526172116255E-8, 8.98305509648872E-6, -1.9939833816331, 200.9824383106796, -187.2403703815547,
 49           91.6087516669843, -23.38765649603339, 2.57121317296198, -0.03801003308653, 1.73379812E7],
 50          [- 7.435856389565537E-9, 8.983055097726239E-6, -0.78625201886289, 96.32687599759846, -1.85204757529826,
 51           -59.36935905485877, 47.40033549296737, -16.50741931063887, 2.28786674699375, 1.026014486E7],
 52          [- 3.030883460898826E-8, 8.98305509983578E-6, 0.30071316287616, 59.74293618442277, 7.357984074871,
 53           -25.38371002664745, 13.45380521110908, -3.29883767235584, 0.32710905363475, 6856817.37],
 54          [- 1.981981304930552E-8, 8.983055099779535E-6, 0.03278182852591, 40.31678527705744, 0.65659298677277,
 55           -4.44255534477492, 0.85341911805263, 0.12923347998204, -0.04625736007561, 4482777.06],
 56          [3.09191371068437E-9, 8.983055096812155E-6, 6.995724062E-5, 23.10934304144901, -2.3663490511E-4,
 57           -0.6321817810242, -0.00663494467273, 0.03430082397953, -0.00466043876332, 2555164.4],
 58          [2.890871144776878E-9, 8.983055095805407E-6, -3.068298E-8, 7.47137025468032, -3.53937994E-6, -0.02145144861037,
 59           -1.234426596E-5, 1.0322952773E-4, -3.23890364E-6, 826088.5]]
 60     Sp=[1.289059486E7, 8362377.87, 5591021, 3481989.83, 1678043.12, 0 ]
 61     lnglat[0]=math.fabs(lng)
 62     lnglat[1] =abs(lat)
 63     for d in range(0,6):
 64         if lnglat[1]>=Sp[d]:
 65             c=Au[d]
 66             break
 67     lnglat=Yr(lnglat,c)
 68     return lnglat
 69 def BD092WGS84(lnglat):
 70     #bd09-gcj
 71 
 72     x_pi = 3.14159265358979324 * 3000.0 / 180.0
 73     pi = 3.1415926535897932384626  # π
 74     a = 6378245.0  # 長半軸
 75     ee = 0.00669342162296594323  # 扁率
 76     x = lnglat[0] - 0.0065
 77     y = lnglat[1] - 0.006
 78     z = math.sqrt(x * x + y * y) - 0.00002 * math.sin(y * x_pi)
 79     theta = math.atan2(y, x) - 0.000003 * math.cos(x * x_pi)
 80     lnglat[0] = z * math.cos(theta)
 81     lnglat[1] = z * math.sin(theta)
 82 
 83     dlat = tranlat1(lnglat[0] - 105.0, lnglat[1] - 35.0)
 84     dlng = tranlng1(lnglat[0] - 105.0, lnglat[1] - 35.0)
 85     radlat = lnglat[1] / 180.0 * pi
 86     magic = math.sin(radlat)
 87     magic = 1 - ee * magic * magic
 88     sqrtmagic = math.sqrt(magic)
 89     dlat = (dlat * 180.0) / ((a * (1 - ee)) / (magic * sqrtmagic) * pi)
 90     dlng = (dlng * 180.0) / (a / sqrtmagic * math.cos(radlat) * pi)
 91     mglat = lnglat[1] + dlat
 92     mglng = lnglat[0] + dlng
 93     return [lnglat[0]* 2 - mglng, lnglat[1] * 2 - mglat]
 94 def tranlat1(lng, lat):
 95     ret = -100.0 + 2.0 * lng + 3.0 * lat + 0.2 * lat * lat + 0.1 * lng * lat + 0.2 * math.sqrt(math.fabs(lng))
 96     ret += (20.0 * math.sin(6.0 * lng * pi) + 20.0 *
 97             math.sin(2.0 * lng * pi)) * 2.0 / 3.0
 98     ret += (20.0 * math.sin(lat * pi) + 40.0 *
 99             math.sin(lat / 3.0 * pi)) * 2.0 / 3.0
100     ret += (160.0 * math.sin(lat / 12.0 * pi) + 320 *
101             math.sin(lat * pi / 30.0)) * 2.0 / 3.0
102     return ret
103 def tranlng1(lng, lat):
104     ret = 300.0 + lng + 2.0 * lat + 0.1 * lng * lng + \
105           0.1 * lng * lat + 0.1 * math.sqrt(math.fabs(lng))
106     ret += (20.0 * math.sin(6.0 * lng * pi) + 20.0 *
107             math.sin(2.0 * lng * pi)) * 2.0 / 3.0
108     ret += (20.0 * math.sin(lng * pi) + 40.0 *
109             math.sin(lng / 3.0 * pi)) * 2.0 / 3.0
110     ret += (150.0 * math.sin(lng / 12.0 * pi) + 300.0 *
111             math.sin(lng / 30.0 * pi)) * 2.0 / 3.0
112     return ret
113 
114 HEADERS = {'Accept':'*/*',
115         'Accept-Encoding':'gzip, deflate, sdch, br',
116         'Accept-Language':'zh-CN,zh;q=0.8',
117         'Connection':'keep-alive',
118         'Cookie':'BAIDUID=C4D08149D7EE627DC037119413418CA3:FG=1; BIDUPSID=C4D08149D7EE627DC037119413418CA3; PSTM=1540284487; pgv_pvi=9789244416; BDUSS=GF-S3Y5c1MybnhoTkhwMUxyWEhHM3ZreW1UTURiQk1TUFllMWc5V1ZWeUVHNGhkRVFBQUFBJCQAAAAAAAAAAAEAAAA~7j45ztLKx8DtuaTIyzMyMQAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAISOYF2EjmBdV; session_id=1567581077599; session_name=; validate=31187; MCITY=-%3A; M_LG_UID=960425535; M_LG_SALT=2cf3bbbbd3e5466b66a2529c037b982b',
119         'Host':'map.baidu.com',
120         'Referer':'https://map.baidu.com/search/%E9%83%91%E5%B7%9E%E5%B8%82%E4%BA%8C%E4%B8%83%E4%B8%87%E8%BE%BE%E4%B8%89%E5%8F%B7%E9%99%A2/@12650489.832882352,4101769.7450000006,18.35z/maptype%3DB_EARTH_MAP?querytype=s&da_src=shareurl&wd=%E9%83%91%E5%B7%9E%E5%B8%82%E4%BA%8C%E4%B8%83%E4%B8%87%E8%BE%BE%E4%B8%89%E5%8F%B7%E9%99%A2&c=268&src=0&pn=0&sug=0&l=18&b=(12650755.24571287,4101867.1117821783;12651854.04020792,4102020.4126732675)&from=webmap&biz_forward=%7B%22scaler%22:1,%22styles%22:%22pl%22%7D&device_ratio=1',
121         'User-Agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/55.0.2883.87 Safari/537.36',
122 }
123 
124 
125 
126 #name_data = xlrd.open_workbook('H:\\2019newstudy\\Big data and population distribution\\小論文_可達性研究\\lunwen_data\\鄭州金水區醫療poi.xlsx')
127 name_data = xlrd.open_workbook('H:\\2019newstudy\\Big data and population distribution\\data\\鄭州主城區\主城區_excel\\Accessibility\\kindergarten.xlsx')
128 table = name_data.sheet_by_index(5)#修改
129 nrows = table.nrows
130 
131 for k in range(1,nrows):#含有標題,所以從第二行讀入
132     name = str(table.cell(k,0).value)
133     # f = stations[f1]
134     # for j in range(nrows):
135     #     t1 = str(table.cell(j,1).value)
136     #     t = stations[t1]
137 #         d=str('2019-09-15')
138         #print ('正在查詢'+f+'至'+t+'的列車,請聽聽音樂...')
139 # name = '河南理工大學'
140     print(name)
141     #url = 'https://map.baidu.com/?newmap=1&qt=s&da_src=searchBox.button&wd=金水區'+name+'&c=268'#需修改的部分
142     url = 'https://map.baidu.com/?newmap=1&qt=s&da_src=searchBox.button&wd='+name+'&c=268'
143     warnings.filterwarnings("ignore")
144     try:
145         r = requests.get(url,headers=HEADERS, allow_redirects=True,verify=False)
146         # time.sleep(random.randint(1,2))
147         data=""
148         with open('H:\\2019newstudy\\Big data and population distribution\\data\\鄭州主城區\主城區_excel\\Accessibility\\kindergarten_AOI.txt','a') as of: #修改
149             with open('H:\\2019newstudy\\Big data and population distribution\\data\\鄭州主城區\主城區_excel\\Accessibility\\kindergarten_except_AOI.txt','a') as of1: #修改
150                 if r.content:
151                        data=r.content
152                        # if r.content.startswith(u'\ufeff'):
153                        #      data = r.content.encode('utf8')[3:].decode('utf8')
154                        data=data.decode("UTF-8")
155                        # data=data.decode("utf8")
156                        data = json.loads(data)
157                        AOI_id = data['result']['profile_uid']
158                        print(data['result']['profile_uid'])
159                        uel_AOI = 'https://map.baidu.com/?newmap=1&qt=ext&uid='+AOI_id+'&ext_ver=new&ie=utf-8&l=11'
160                        try:
161                            r_AOI = requests.get(uel_AOI,headers=HEADERS, allow_redirects=True,verify=False)
162                            data_AOI=""
163 
164                            if r_AOI.content:
165                                 data_AOI=r_AOI.content
166                                 data_AOI=data_AOI.decode("UTF-8")
167                                 data_AOI = json.loads(data_AOI)
168                                 try:
169                                     if 'geo' in data_AOI['content'] :
170                                         data_AOI['content']['geo']
171                                         geo_AOI = data_AOI['content']['geo']
172                                         geo_AOI = geo_AOI.split('|')
173                                         print(geo_AOI[2])
174                                         point = geo_AOI[2].split(",")
175                                         point_transform = []
176                                         for i in range(int(len(point)/2)):#全部點的坐標,分別是x,y,的形式
177                                             if i==0:#第一個點的x坐標刪除‘1-’
178                                                 print(point)
179                                                 point[2*i] = point[2*i][2:]
180                                             if i==int((len(point)/2)-1):#最后的點的y坐標刪除‘;’
181                                                 point[2*i+1] = point[2*i+1][:-1]
182                                             print('各點的坐標',float(point[2*i]),float(point[2*i+1]))#打印出各點的坐標
183                                             point_Mecator2BD09 = Mecator2BD09(float(point[2*i]),float(point[2*i+1]))
184                                             point_BD092WGS84 = BD092WGS84(point_Mecator2BD09)
185                                             point_transform.append(point_BD092WGS84)
186                                             print(point_transform)
187                                             point_str = ''
188                                             for j in range(len(point_transform)):
189                                                 point_str = point_str+(str(point_transform[j])).replace(' ','')[1:-1]+';'
190                                                 print('轉換坐標后的坐標點',point_str)
191 
192 
193 
194                                         of.write(name+' '+point_str+'\n')
195                                     if 'geo' not in data_AOI['content']:
196                                         of1.write(name+' '+'\n')
197 
198                                 except socket.timeout:
199                                     print('失敗')
200 
201                        except socket.timeout:
202                           print('失敗')
203                 #time.sleep(random.randint(1,2))
204     except socket.timeout:
205         print('失敗')

輸入的名稱數據如下:

獲得的結果:


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM