Coursera課程《Using Python to Access Web Data》 密歇根大學
Week6 JSON and the REST Architecture
13.5 JavaScript Object Notation(JSON)
JSON是一種相比於XML更簡單的格式,而且現在在互聯網上非常普遍。XML是很強大,但是很多時候我們並不需要使用這么強大的格式,我們就能完成我們的任務。
import json
data = '''{
"name": "Chuck",
"phone": {
"type": "intl",
"number": "+1 734 303 4456"
},
"email": {
"hide": "yes"
}
}'''
info = json.loads(data)
print('Name:',info["name"])
print('Hide:',info["email"]["hide"])
JSON表示數據是用一種list與dictionary的組合的結構。
13.6 Service Oriented
有一些小應用需要使用其他網頁提供的一些服務的時候,就會需要使用這些網頁發布的“規則”來進行獲取服務,這種我們叫做API(Application Program Interface)。

13.7 Using Application Programming Interfaces
以下是使用Google的geocoding API的代碼。
import urllib.request, urllib.parse, urllib.error
import json
# Note that Google is increasingly requiring keys
# for this API
serviceurl = 'http://maps.googleapis.com/maps/api/geocode/json?'
while True:
address = input('Enter location: ')
if len(address) < 1: break
url = serviceurl + urllib.parse.urlencode(
{'address': address})
print('Retrieving', url)
uh = urllib.request.urlopen(url)
data = uh.read().decode()
print('Retrieved', len(data), 'characters')
try:
js = json.loads(data)
except:
js = None
if not js or 'status' not in js or js['status'] != 'OK':
print('==== Failure To Retrieve ====')
print(data)
continue
print(json.dumps(js, indent=4))
lat = js["results"][0]["geometry"]["location"]["lat"]
lng = js["results"][0]["geometry"]["location"]["lng"]
print('lat', lat, 'lng', lng)
location = js['results'][0]['formatted_address']
print(location)
使用這個API,Google可以給我們返回我們輸入的地點的經緯度之類的信息。
需要說明的是,json.dumps()用於將dict類型的數據轉成str,因為如果直接將dict類型的數據寫入json文件中會發生報錯,因此在將數據寫入時需要用到該函數。而它帶的那個參數indent可以使json顯示為樹形結構,更加方便閱讀。
13.8 Securing API Requests
這里我們講的Twitter的API和之前的Google Map APIs不同,它需要我們去注冊,獲得他們的API Key。
import urllib.request, urllib.parse, urllib.error
import twurl
import json
import ssl
# https://apps.twitter.com/
# Create App and get the four strings, put them in hidden.py
TWITTER_URL = 'https://api.twitter.com/1.1/friends/list.json'
# Ignore SSL certificate errors
ctx = ssl.create_default_context()
ctx.check_hostname = False
ctx.verify_mode = ssl.CERT_NONE
while True:
print('')
acct = input('Enter Twitter Account:')
if (len(acct) < 1): break
url = twurl.augment(TWITTER_URL,
{'screen_name': acct, 'count': '5'})
print('Retrieving', url)
connection = urllib.request.urlopen(url, context=ctx)
data = connection.read().decode()
js = json.loads(data)
print(json.dumps(js, indent=2))
headers = dict(connection.getheaders())
print('Remaining', headers['x-rate-limit-remaining'])
for u in js['users']:
print(u['screen_name'])
if 'status' not in u:
print(' * No status found')
continue
s = u['status']['text']
print(' ', s[:50])
注意,headers = dict(connection.getheaders())這行代碼是來獲取headers的。
而print('Remaining', headers['x-rate-limit-remaining'])是獲取限速的剩余次數的,是寫在header里的。(好像只有Twitter API配置了這個?)
而我們要在Twitter的網頁上獲取下面代碼里的這些東西。不然,我們是沒法訪問Twitter的服務的。
def oauth():
return {"consumer_key": "h7Lu...Ng",
"consumer_secret": "dNKenAC3New...mmn7Q",
"token_key": "10185562-eibxCp9n2...P4GEQQOSGI",
"token_secret": "H0ycCFemmC4wyf1...qoIpBo"}
以下是使用叫作OAuth的一種協議來獲取訪問Twitter的URL的。
import urllib.request, urllib.parse, urllib.error
import oauth
import hidden
# https://apps.twitter.com/
# Create App and get the four strings, put them in hidden.py
def augment(url, parameters):
secrets = hidden.oauth()
consumer = oauth.OAuthConsumer(secrets['consumer_key'],
secrets['consumer_secret'])
token = oauth.OAuthToken(secrets['token_key'], secrets['token_secret'])
oauth_request = oauth.OAuthRequest.from_consumer_and_token(consumer,
token=token, http_method='GET', http_url=url,
parameters=parameters)
oauth_request.sign_request(oauth.OAuthSignatureMethod_HMAC_SHA1(),
consumer, token)
return oauth_request.to_url()
def test_me():
print('* Calling Twitter...')
url = augment('https://api.twitter.com/1.1/statuses/user_timeline.json',
{'screen_name': 'drchuck', 'count': '2'})
print(url)
connection = urllib.request.urlopen(url)
data = connection.read()
print(data)
headers = dict(connection.getheaders())
print(headers)
作業代碼1
import urllib.request, urllib.parse, urllib.error
import json
url=input('Enter location: ')
print('Retrieving ',url)
uh = urllib.request.urlopen(url)
data = uh.read().decode()
print('Retrieved', len(data), 'characters')
info = json.loads(data)
sum = 0
count = 0
for item in info["comments"]:
sum = sum + item["count"]
count+=1
print('Count: ', count)
print('Sum: ',sum)
作業代碼2
import urllib.request, urllib.parse, urllib.error
import json
# Note that Google is increasingly requiring keys
# for this API
serviceurl = 'http://py4e-data.dr-chuck.net/geojson?'
while True:
address = input('Enter location: ')
if len(address) < 1: break
url = serviceurl + urllib.parse.urlencode({'address': address})
print('Retrieving', url)
uh = urllib.request.urlopen(url)
data = uh.read().decode()
print('Retrieved', len(data), 'characters')
try:
js = json.loads(data)
except:
js = None
if not js or 'status' not in js or js['status'] != 'OK':
print('==== Failure To Retrieve ====')
print(data)
continue
place_id = js["results"][0]["place_id"]
print('Place id ',place_id)
