轉自: http://blog.itpub.net/29867/viewspace-716088/
(修改部分內容)
wget --restrict-file-name=ascii -m -c -nv -np -k -E -p http://www.w3school.com.cn/
wget --restrict-file-name=ascii -m -c -nv -np -k -E -p http://scrapy-chs.readthedocs.org
參數釋義如下:
--restrict-file-name=ascii ,將文件名保存為ASCII格式。這樣能避免utf-8文件名帶來的麻煩(注:1.12版才支持ascii參數值)
-m 整站下載,mirror的縮寫,是-N -r -l inf --no-remove-listing 這幾個參數的快捷方式,具體詳閱各自的說明
-c 續傳
-nv 不顯示詳細的下載詳情
-np don’t ascend to the parent directory.即下載的Web頁面不越過后面指定的 http://www.xxx.com的范圍。當然,如果你指定的是 http://www.xxx.com/aaa,則所有的web頁面都要在 http://www.xxx.com/aaa下
-k 下載完成后,將頁面文件中的鏈接轉換為本地鏈接,便於離線瀏覽和制作chm等
-E 保存html/css文件時,使用合適的文件后綴。例如,在某些網站有些文件是服務器端動態生成的,雖然是css文件,但后綴並不是css,-E選項可以調整之
-p -np對頁面文件做了限制,如果不加-p,則html所需的媒體文件也會受限於-np,-p則會下載html/css文件所需的所有媒體文件(圖片、音頻、視頻等)
-R 拒絕下載的文件后綴列表,逗號分隔
至於下載到的文件的文件名變為了形如%A7這樣百分號加16進制數字的形式,可以用個python程序來改變文件名:
————————————————————————————————————
import os, urllib, sys, getopt
class Renamer:
input_encoding = ""
output_encoding = ""
path = ""
is_url = False
def __init__(self, input, output, path, is_url):
self.input_encoding = input
self.output_encoding = output
self.path = path
self.is_url = is_url
def start(self):
self.rename_dir(self.path)
def rename(self, root, path):
try:
if self.is_url:
new = urllib.unquote(path).decode(self.input_encoding).encode(self.output_encoding)
else:
new = path.decode(self.input_encoding).encode(self.output_encoding)
os.rename(os.path.join(root, path), os.path.join(root, new))
except:
pass
def rename_dir(self, path):
for root, dirs, files in os.walk(path):
for f in files:
self.rename(root, f)
if dirs == []:
for f in files:
self.rename(root, f)
else:
for d in dirs:
self.rename_dir(os.path.join(root, d))
self.rename(root, d)
def usage():
print '''This program can change encode of files or directories.
Usage: rename.py [OPTION]...
Options:
-h, --help this document.
-i, --input-encoding=ENC set original encoding, default is UTF-8.
-o, --output-encoding=ENC set output encoding, default is GBK.
-p, --path=PATH choose the path which to process.
-u, --is-url whether as a URL
'''
def main(argv):
input_encoding = "utf-8"
output_encoding = "gbk"
path = ""
is_url = True
try:
opts, args = getopt.getopt(argv, "hi:o:p:u", ["help", "input-encoding=", "output-encoding=", "path=", "is-url"])
except getopt.GetoptError:
usage()
sys.exit(2)
for opt, arg in opts:
if opt in ("-h", "--help"):
usage()
sys.exit()
elif opt in ("-i", "--input-encoding"):
input_encoding = arg
elif opt in ("-o", "--output-encoding"):
output_encoding = arg
elif opt in ("-p", "--path"):
path = arg
elif opt in ("-u", "--is-url"):
is_url = True
rn = Renamer(input_encoding, output_encoding, path, is_url)
rn.start()
if __name__ == '__main__':
main(sys.argv[1:])
————————————————————————————————————
rename.py -i utf-8 -o gbk -p <指定的下載目錄> -u
文件改名方法來自於http://blog.csdn.net/kowity/article/details/6899256