line = "python3.6下进行去!@#$%^&*()除标点测试,:!大家好,:!&》啥都不是!@#¥%……&*(-、||" # python3不支持ur, 使用r代替 def remove_punctuation(line): rule = re.compile(r"[^a-zA-Z0-9\u4e00-\u9fa5]") line = rule.sub('', line) return line
#title = href.get('title'); #gb2312 GB18030
#dirName = title.encode("latin1").decode("gbk") #文件夹名称