接口文檔:http://lbs.baidu.com/index.php?title=webapi/guide/webservice-placeapi#service-page-anchor-1-3
1、示例中的數據是從鏈家拿到的北京小區信息,包含小區名和所在行政區,例如朱雀門/西城,實例化CheckEstateData(朱雀門, 西城)
2、API中region可以多維度組合,為防止全國或同行政區有重名,例如北京有多個萬達廣場,最好加上region=市+區
3、有一些百度API會把小區名莫名解釋成錯誤的名稱,例如《東四西大街50號院》->《東4西大街》,這類數據不會直接更新數據,set status = -1,需要手動維護數據
# -*- coding: utf-8 -*- import urllib2, json, sys, time reload(sys) sys.setdefaultencoding("utf8") GLOBAL_URL = "http://api.map.baidu.com/place/v2/search?region=北京%s&city_limit=true&query=%s&page_size=10&output=json&ak=%s" GLOBAL_AK = "" GLOBAL_SQL = "SELECT `name`, `district` FROM estate WHERE location IS NULL and `status` IS NULL LIMIT 100 " COLUMN_LIST = ["area", "address", "location", "province", "city", "uid"] UPDATE_SQL = """ UPDATE estate SET %s, source_name = '%s', `status` = %s, `result` = '%s' WHERE `name` = '%s' AND `district` = '%s' """ """ estate的status字段 更新數據狀態 小區名全匹配且全屬性 0 小區名全匹配屬性不全 1 小區名全匹配無detail=1 2 小區名無全匹配第一個detail=1的全屬性數據 3 小區名無全匹配第一個detail=1的屬性不全數據 4 小區名無全匹配沒有detail=1的數據 5 沒搜到任何數據 -1 """
class CheckEstate:
def __init__(self):
# 本地存
pass
class CheckEstateData: def __init__(self, name, district): self.name = name self.district = district self.error = None self.msg = None self.__get_data__() if self.datas: self.do() def __get_data__(self): try: print "URL: %s" % (GLOBAL_URL % (self.district, self.name, GLOBAL_AK)) html = urllib2.urlopen(GLOBAL_URL % (self.district, self.name, GLOBAL_AK)) b = html.read() c = json.loads(b) if c["status"] == 0 and c["message"] == "ok": self.datas, self.error = c["results"], None else: self.datas, self.error = None, "ERR: API return %s" % c["message"] except Exception, e: self.datas, self.error = None, "ERR: get data %s %s" % (self.name, str(e)) def update(self, sql): print "INFO: sql %s" % sql s = CheckEstate(sql) if s.error: self.error = "ERR: UPDATE ERR, %s" % s.error else: self.msg = "INFO: %s ok" % self.name def check_colunm(self, data): if not set(COLUMN_LIST).difference([k for k in data]): return True return False def get_info(self): for d in self.datas: if d["name"] == self.name and "detail" in d and d["detail"] == 1: r = check_colunm(d) if r: return d, 0 else: return d, 1 # if self.name in [row["name"] for row in self.datas]: # return None, 2 for d in self.datas: if "detail" in d and d["detail"] == 1: r = self.check_colunm(d) if r: return d, 3 else: return d, 4 return None, -1 def do(self): r, status = self.get_info() if r: value = ", ".join( [ "%s = '%s'" % (k, json.dumps(r[k])) if k == "location" else "%s = '%s'" % (k, r[k]) for k in [key for key in r if key in COLUMN_LIST] ] ) sql = UPDATE_SQL % (value, r["name"], status, json.dumps(self.datas, ensure_ascii=False), self.name, self.district) else: sql = "UPDATE estate set `status` = %s, `result` = '%s' WHERE `name` = '%s' and district = '%s'" % (status, json.dumps(self.datas, ensure_ascii=False), self.name, self.district) self.update(sql) def get_estate_info(): c = CheckEstate(None) if c.error: print c.error return c.error for d in c.r: estate = CheckEstateData(d["name"], d["district"]) if estate.error: print estate.error else: print estate.msg time.sleep(0.5) return None if __name__ == "__main__": get_estate_info()
為防止鏈家和百度的小區名有差異,在存儲時將API的所有數據本地存一份
CREATE TABLE `estate` ( `id` bigint(20) unsigned NOT NULL AUTO_INCREMENT, `name` varchar(50) NOT NULL COMMENT '小區名', `source_name` varchar(50) DEFAULT NULL COMMENT 'baidu小區原名', `district` varchar(20) DEFAULT NULL COMMENT '區(鏈家側數據)', `area` varchar(20) DEFAULT NULL COMMENT '區(百度側數據)', `street_id` varchar(50) DEFAULT NULL COMMENT '街景圖id', `address` varchar(100) DEFAULT NULL COMMENT '地址', `location` json DEFAULT NULL COMMENT '坐標', `province` varchar(30) DEFAULT NULL COMMENT '省份', `city` varchar(30) DEFAULT NULL COMMENT '城市', `uid` varchar(100) DEFAULT NULL COMMENT 'poi的唯一標示,可用於詳情檢索', `status` tinyint(4) DEFAULT NULL COMMENT '更新數據狀態,具體含義看代碼', `result` json DEFAULT NULL COMMENT '接口返回的數據', PRIMARY KEY (`id`), KEY `idx_name` (`name`) ) ENGINE=InnoDB AUTO_INCREMENT=8192 DEFAULT CHARSET=utf8mb4;
