python(十)：列表轉換成字典

本文轉載自查看原文 2021-09-22 17:37 252 python

一、列表轉換成字典

        self.cat_list = []
        with open(os.path.join(self.raw_data, "cat.txt")) as f:
            for line in f.readlines():
                self.cat_list.append(line.strip())
        self.label_dict = dict(zip(self.cat_list, range(len(self.cat_list))))

二、NLP生成字典

def remove_1a(content):
    # 去除標點字母數字
    chinese = '[\u4e00-\u9fa5a-zA-Z0-9]+'
    str1 = re.findall(chinese, content)
    return ''.join(str1)


def read_file(filename):
    """讀取文件數據"""
    contents, labels = [], []
    with open(filename, mode='r', encoding='utf-8', errors='ignore') as f:
        for line in f:
            try:
                label, content = line.split("  ")
                if content:
                    content = remove_1a(content)
                    contents.append(list(content))
                    labels.append(label)
            except:
                pass
    return contents, labels


def build_vocab(train_dir, vocab_dir, vocab_size=5000):
    """根據訓練集構建詞匯表，存儲"""
    data_train, _ = read_file(train_dir)

    all_data = []
    for content in data_train:
        all_data.extend(content)

    counter = Counter(all_data)
    print(all_data)
    count_pairs = counter.most_common(vocab_size - 1)
    pairs = []
    for i in count_pairs:
        if i[1] > 2:
            pairs.append(i)
    count_pairs = pairs
    words, _ = list(zip(*count_pairs))
    # 添加一個 <PAD> 來將所有文本pad為同一長度
    words = ['<PAD>'] + list(words)
    open(vocab_dir, mode='w', encoding='utf-8', errors='ignore').write('\n'.join(words) + '\n')

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 python將鍵值對參數轉換成字典 python 將txt文件轉換成字典 python將str轉換成字典 python中json怎么轉換成字典 python基礎:eval函數妙用--將字符串str轉換成列表、字典等 python將列表list轉換成鏈表 Python：【列表】將一個整數轉換成列表 python 將字符串轉換成字典dict python 將字符串轉換成字典dict python讀取excel數據轉換成字典