HTTP 請求中包含Accept-encoding: gzip頭信息可以告訴服務器,如果它有任何新數據要發送給我時,請以壓縮的格式發送。如果服務器支持壓縮,它將返回由 gzip 壓縮的數據並且使用Content-encoding: gzip頭信息標記。
#codeing:utf-8
import urllib2, httplib
import StringIO
import gzip
def findUrlGzip(url):
request = urllib2.Request(url)
request.add_header('Accept-encoding', 'gzip')
pener = urllib2.build_opener()
f = opener.open(request)
isGzip = f.headers.get('Content-Encoding')
#print isGzip
if isGzip :
compresseddata = f.read()
compressedstream = StringIO.StringIO(compresseddata)
gzipper = gzip.GzipFile(fileobj=compressedstream)
data = gzipper.read()
else:
data = f.read()
return data
def findUrlTitle(url):
html = findUrlGzip(url)
html = html.lower()
spos = html.find("<title>")
epos = html.find("</title>")
if spos != -1 and epos != -1 and spos < epos:
title = html[spos+7:epos]
title = title[:-9]
else:
title = ""
return title
if __name__ == "__main__":
url = 'http://business.sohu.com/20101010/n275509607.shtml'
title = findUrlTitle(url)
print title