pre = "0 0 B_SONG I_SONG I_SONG 0 B_SONG I_SONG I_SONG 0 0 B_SINGER I_SINGER I_SINGER 0 O O O B_ALBUM I_ALBUM I_ALBUM O O B_TAG I_TAG I_TAG O" true = "0 0 B_SONG I_SONG I_SONG 0 0 0 0 0 0 B_SINGER I_SINGER I_SINGER 0 O O O B_ALBUM I_ALBUM I_ALBUM O O B_TAG I_TAG I_TAG O" tags = [("B_SONG","I_SONG"),("B_SINGER","I_SINGER"),("B_ALBUM","I_ALBUM"),("B_TAG","I_TAG")] def find_tag(labels,B_label="B_SONG",I_label="I_SONG"): result = [] if isinstance(labels,str): # 如果labels是字符串 labels = labels.strip().split() # 將labels進行拆分 labels = ["O" if label =="0" else label for label in labels] # 如果標簽是O就就是O,否則就是label # print(labels) for num in range(len(labels)): # 遍歷Labels if labels[num] == B_label: song_pos0 = num # 記錄B_SONG的位置 if labels[num] == I_label and labels[num-1] == B_label: # 如果當前lable是I_SONG且前一個是B_SONG lenth = 2 # 當前長度為2 for num2 in range(num,len(labels)): # 從該位置開始繼續遍歷 if labels[num2] == I_label and labels[num2-1] == I_label: # 如果當前位置和前一個位置是I_SONG lenth += 1 # 長度+1 if labels[num2] == "O": # 如果當前標簽是O result.append((song_pos0,lenth)) #z則取得B的位置和長度 break # 退出第二個循環 return result def find_all_tag(labels): result = {} for tag in tags: res = find_tag(labels,B_label=tag[0],I_label=tag[1]) result[tag[0].split("_")[1]] = res # 將result賦值給就標簽 return result res = find_all_tag(pre)
結果:
{'ALBUM': [(18, 3)], 'SINGER': [(11, 3)], 'SONG': [(2, 3), (6, 3)], 'TAG': [(23, 3)]}
接下來計算精確率precision、召回率(查全率)recall、F1:
def precision(pre_labels,true_labels): ''' :param pre_tags: list :param true_tags: list :return: ''' pre = [] if isinstance(pre_labels,str): pre_labels = pre_labels.strip().split() # 字符串轉換為列表 pre_labels = ["O" if label =="0" else label for label in pre_labels] if isinstance(true_labels,str): true_labels = true_labels.strip().split() true_labels = ["O" if label =="0" else label for label in true_labels] pre_result = find_all_tag(pre_labels) # pre_result是一個字典,鍵是標簽,值是一個元組,第一位是B的位置,第二位是長度 for name in pre_result: # 取得鍵,也就是標簽 for x in pre_result[name]: # 取得值:也就是元組,注意元組可能有多個 if x: # 如果x存在 if pre_labels[x[0]:x[0]+x[1]] == true_labels[x[0]:x[0]+x[1]]: # 判斷對應位置的每個標簽是否一致 pre.append(1) # 一致則結果添加1 else: pre.append(0) # 不一致則結果添加0 return sum(pre)/len(pre) #為1的個數/總個數 def recall(pre_labels,true_labels): ''' :param pre_tags: list :param true_tags: list :return: ''' recall = [] if isinstance(pre_labels,str): pre_labels = pre_labels.strip().split() pre_labels = ["O" if label =="0" else label for label in pre_labels] if isinstance(true_labels,str): true_labels = true_labels.strip().split() true_labels = ["O" if label =="0" else label for label in true_labels] true_result = find_all_tag(true_labels) for name in true_result: # 取得鍵,也就是標簽,這里注意和計算precision的區別,遍歷的是真實標簽列表 for x in true_result[name]: # 以下的基本差不多 if x: if pre_labels[x[0]:x[0]+x[1]] == true_labels[x[0]:x[0]+x[1]]: recall.append(1) else: recall.append(0) return sum(recall)/len(recall) def f1_score(precision,recall): return (2*precision*recall)/(precision+recall) # 有了precision和recall,計算F1就簡單了 if __name__ == '__main__': precision = precision(pre,true) recall = recall(pre,true) f1 = f1_score(precision,recall) print(precision) print(recall) print(f1)
結果:
0.8
1.0
0.888888888888889
參考:http://www.manongjc.com/detail/15-ochyrivhdccrvka.html