百度地圖API爬取不同類型POI的詳細數據


一、相關概念

查詢某個范圍內的所有POI

  • 參數介紹:

    • page_size:單次查詢返回的POI的數量,最大值為20
    • page_num:查找的POI數量超過20時,會分頁顯示;比如60個POI就會分3頁;此時,page_num=1/2/3會先顯示全部的數據;當page_num=4時,第4頁的結果集大小為0;
    • scope:1為默認值;2會顯示詳細數據
    • region:檢索的行政區域
  • URL鏈接:

    http://api.map.baidu.com/place/v2/search/?query=查詢關鍵字&page_size=20&page_num=0&output=json&bounds=40.817,111.697,40.821,111.709&scope=2&ak=你的ak

  • 查詢結果示例:

    "status":0,
        "message":"ok",
        "total":2,
        "result_type":"poi_type",
        "results":[
            {
                "name":"紅螺寺",
                "location":{
                    "lat":40.390454,
                    "lng":116.632411
                },
                "address":"北京市懷柔區紅螺東路2號",
                "province":"北京市",
                "city":"北京市",
                "area":"懷柔區",
                "street_id":"",
                "telephone":"(010)60681175,(010)60681639",
                "detail":1,
                "uid":"605884e7c61e3573871541a3",
                "detail_info":{
                    "tag":"旅游景點;文物古跡",
                    "navi_location":{
                        "lng":116.63176774842,
                        "lat":40.37846005246
                    },
                    "type":"scope",
                    "detail_url":"http://api.map.baidu.com/place/detail?uid=605884e7c61e3573871541a3&output=html&source=placeapi_v2",
                    "overall_rating":"4.3",
                    "comment_num":"200",
                    "children":[
                        
                    ]
                }
            },
            {
                "name":"卧佛寺",
                "location":{
                    "lat":40.013776,
                    "lng":116.213915
                },
                "address":"北京市海淀區卧佛寺路北京植物園內",
                "province":"北京市",
                "city":"北京市",
                "area":"海淀區",
                "street_id":"934b3dbf0a8d977b8b2fb5c0",
                "detail":1,
                "uid":"934b3dbf0a8d977b8b2fb5c0",
                "detail_info":{
                    "tag":"旅游景點;文物古跡",
                    "navi_location":{
                        "lng":116.21389548337,
                        "lat":40.011540367963
                    },
                    "type":"scope",
                    "detail_url":"http://api.map.baidu.com/place/detail?uid=934b3dbf0a8d977b8b2fb5c0&output=html&source=placeapi_v2",
                    "overall_rating":"4.7",
                    "image_num":"38",
                    "comment_num":"74",
                    "children":[
                        
                    ]
                }
            }
            ]
    

查詢某個POI的詳細數據

  • 參數介紹:

    • uid:某個POI對應的唯一的標識(通過范圍查詢獲取到的)
  • URL鏈接:

    http://api.map.baidu.com/place/v2/detail?uid=fabbfbf31f9a6964ad31e55f&output=json&scope=2&ak=你的ak

  • 查詢結果示例:

    {
        "status":0,
        "message":"ok",
        "result":{
            "uid":"605884e7c61e3573871541a3",
            "street_id":"",
            "name":"紅螺寺",
            "location":{
                "lng":116.63241097199,
                "lat":40.390454021402
            },
            "address":"北京市懷柔區紅螺東路2號",
            "province":"北京市",
            "city":"北京市",
            "area":"懷柔區",
            "telephone":"(010)60681175,(010)60681639",
            "detail_info":{
                "tag":"旅游景點;文物古跡",
                "navi_location":{
                    "lng":116.63176778525,
                    "lat":40.378460018453
                },
                "detail_url":"http://api.map.baidu.com/place/detail?uid=605884e7c61e3573871541a3&output=html&source=placeapi_v2",
                "type":"scope",
                "price":"¥54元",
                "overall_rating":"4.3",
                "image_num":"133",
                "comment_num":"200",
                "scope_type":"古跡",
                "scope_grade":"AAAA",
                "content_tag":"適合親子;登山;禮佛祈福;賞紅葉;適合拍照;日出;適合跑步;銀杏;情侶約會;香火旺;免費項目;收費合理;空氣清新;綠植繁茂;位置優越;景色優美;人氣旺;景區大;氣勢宏大;環境不錯;玩的開心;休閑好去處;值得游玩;建築風格獨特;景點多;保存完整;停車方便;交通便利;設施新全;服務熱情;收獲頗豐;衛生干凈"
            },
            "detail":1
        }
    }
    

二、相關鏈接

三、功能模塊

  • 范圍查詢獲取POI數據

    #將查詢到的poi數據存入數據庫
    def insertPOIData(name_list,ak,cursor):
        #總共查詢到了多少對象
        total = 0
        #不重復的向數據庫中寫入的數據條數
        inserttotal = 0
        for i in name_list:
            #ecxel表格數據判空
            if i == '':
                break
            #j的范圍從0開始;上限不一樣
            for j in range(0, 10):
                time.sleep(3)
                url = getUrlByName(i, ak, j)
                print(url)
                html = requests.get(url)
                # print(type(html))       response類型
                data = html.json()
                # print(type(data))        dict類型
                print(data)
                #status狀態碼為0表示獲取正常
                if data['status'] == 0:
                    #判斷獲取的數量,為0表示查詢不到該類型的對象
                    if data['total'] == 0:
                        break
                    total = total + data['total']
                    if 'results' in data:
                        for item in data['results']:  # 一次返回的results中有20條數據
                            # print(item)
                            name = item['name']
                            if isExist(cursor, item['uid']):
                                print(f'{name}已經存在')
                                #跳出循環,判斷results中的下一個item
                                continue
                            insert = "insert into poidatas(tag,uid,lat,lng,name,address,province,city,area) values ('%s','%s','%s','%s','%s','%s','%s','%s','%s')" % (
                            i, item['uid'], str(item['location']['lat']), str(item['location']['lng']), item['name'],
                            item['address'], item['province'], item['city'], item['area'])  # 字符串類型的數據插入要加單引號
                            if cursor.execute(insert):
                                inserttotal = inserttotal + 1
                            if 'overall_rating' in item['detail_info']:
                                update = "update poidatas set overall_rating ='%s' where uid = '%s'" % (
                                item['detail_info']['overall_rating'], item['uid'])
                                cursor.execute(update)
                            if 'distance' in item['detail_info']:
                                update = "update poidatas set distance ='%s' where uid = '%s'" % (
                                item['detail_info']['distance'], item['uid'])
                                cursor.execute(update)
                            if 'comment_num' in item['detail_info']:
                                update = "update poidatas set comment_num ='%s' where uid = '%s'" % (
                                item['detail_info']['comment_num'], item['uid'])
                                cursor.execute(update)
                            if 'price' in item['detail_info']:
                                update = "update poidatas set price ='%s' where uid = '%s'" % (
                                item['detail_info']['price'], item['uid'])
                                cursor.execute(update)
    
                    if 'result' in data:  #還需要對只有一個返回結果的情況進行判斷
                        #區別就是這里不能用for循環
                        item = data['result']
                        # print(item)
                        db = pymysql.connect(host="localhost", user="root", password="root", database="poi")
                        cursor = db.cursor()
                        name = item['name']
                        if isExist(cursor, item['uid']):
                            print(f'{name}已經存在')
                            exit()
                        insert = "insert into poidatas(tag,uid,lat,lng,name,address,province,city,area) values ('%s','%s','%s','%s','%s','%s','%s','%s','%s')" % (
                        i, item['uid'], str(item['location']['lat']), str(item['location']['lng']), item['name'],
                        item['address'], item['province'], item['city'], item['area'])  # 字符串類型的數據插入要加單引號
                        if cursor.execute(insert):
                            inserttotal = inserttotal + 1
                        if 'overall_rating' in item['detail_info']:
                            update = "update poidatas set overall_rating ='%s' where uid = '%s'" % (
                            item['detail_info']['overall_rating'], item['uid'])
                            cursor.execute(update)
                        if 'distance' in item['detail_info']:
                            update = "update poidatas set distance ='%s' where uid = '%s'" % (
                            item['detail_info']['distance'], item['uid'])
                            cursor.execute(update)
                        if 'comment_num' in item['detail_info']:
                            update = "update poidatas set comment_num ='%s' where uid = '%s'" % (
                            item['detail_info']['comment_num'], item['uid'])
                            cursor.execute(update)
                        if 'price' in item['detail_info']:
                            update = "update poidatas set price ='%s' where uid = '%s'" % (
                            item['detail_info']['price'], item['uid'])
                            cursor.execute(update)
        print('總共查找到的POI數量為 : ')
        print(total)
        print('插入數據庫的POI數量為 : ')
        print(inserttotal)
    
  • 根據uid查詢POI詳細數據

    #通過uid查詢更詳細的數據並存入數據庫
    def updateDetailInfo(ak,cursor):
        selectsql = 'SELECT uid FROM poidatas'
        cursor.execute(selectsql)
        result = cursor.fetchall()
        for row in result:
            uid = row[0]
            url2 = 'http://api.map.baidu.com/place/v2/detail?uid=%s&output=json&scope=2&ak=%s' %(uid,ak)
            print(url2)
            time.sleep(3)
            html=requests.get(url2)
            data=html.json()
            print(data)
            if data['status']==0:
                if 'result' in data:
                    #print(data['result'])
                    #result集合大小為1,這里不能使用for循環
                    item = data['result']
                    if 'shop_hours' in item['detail_info']:
                        update = "update poidatas set shop_hours ='%s' where uid = '%s'" % (item['detail_info']['shop_hours'],item['uid'])
                        print(update)
                        cursor.execute(update)
                    if 'detail_url' in item['detail_info']:
                        update = "update poidatas set detail_url ='%s' where uid = '%s'" % (item['detail_info']['detail_url'],item['uid'])
                        print(update)
                        cursor.execute(update)
                    if 'image_num' in item['detail_info']:
                        update = "update poidatas set image_num ='%s' where uid = '%s'" % (item['detail_info']['image_num'],item['uid'])
                        print(update)
                        cursor.execute(update)
                    if 'service_rating' in item['detail_info']:
                        update = "update poidatas set service_rating ='%s' where uid = '%s'" % (item['detail_info']['service_rating'],item['uid'])
                        print(update)
                        cursor.execute(update)
                    if 'environment_rating' in item['detail_info']:
                        update = "update poidatas set environment_rating ='%s' where uid = '%s'" % (item['detail_info']['environment_rating'],item['uid'])
                        print(update)
                        cursor.execute(update)
    
  • 判斷POI是否已經存入數據庫

    #判斷是否已經存入數據庫
    def isExist(cursor,uid):
        sql = "select * from poidatas where uid = '%s'" % uid
        #print(cursor.execute(sql))  sql語句執行成功,返回的是1
        if cursor.execute(sql):
            return True
        else:
            return False
    
  • 從excel表中讀取POI類別

    def readExcel(path):
        data = xlrd.open_workbook(path)
        sheets = data.sheets()
        data_list=[]
        for i in range(len(sheets)):
            table=data.sheets()[i]
            table_rows=table.nrows
            table_cols=table.ncols       
            for j in range(table_rows):
                data_list.append( table.cell(j,0).value)
        return data_list
    
  • 拼接訪問URL

    def getUrlByName(name,ak,j):
        #矩形搜索,POI數量較少
        url = 'http://api.map.baidu.com/place/v2/search/?query=%s&page_size=20&page_num=%s&output=json&bounds=40.817,111.697,40.821,111.709&scope=2&ak=%s' %(name,j,ak)
        # 行政區域搜索,POI數量較多
        #url = 'http://api.map.baidu.com/place/v2/search/?query=%s&output=json&region=呼和浩特&scope=2&ak=%s' %(name,ak)
        return url
    
  • Main函數

    def Main():
        ak = "~~~~~"
        name_list=readExcel(r'D:\poi類別.xls')
        db = pymysql.connect(host="localhost", user="root", password="root", database="poi")
        cursor = db.cursor()
        insertPOIData(name_list,ak,cursor)
        updateDetailInfo(ak, cursor)
        db.commit()
        cursor.close()
    


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM