1 #定義幾種距離計算函數 2 #更高效的方式為把得分向量化之后使用scipy中定義的distance方法 3 4 from math import sqrt 5 def euclidean_dis(rating1, rating2): #歐式距離計算 6 """計算2個打分序列間的歐式距離. 輸入的rating1和rating2都是打分dict 7 格式為{'小時代4': 1.0, '瘋狂動物城': 5.0}""" 8 distance = 0 9 commonRatings = False 10 for key in rating1: 11 if key in rating2: 12 distance += (rating1[key] - rating2[key])^2 13 commonRatings = True 14 #兩個打分序列之間有公共打分電影 15 if commonRatings: 16 return distance 17 #無公共打分電影 18 else: 19 return -1 20 21 22 def manhattan_dis(rating1, rating2): #曼哈頓距離計算 23 """計算2個打分序列間的曼哈頓距離. 輸入的rating1和rating2都是打分dict 24 格式為{'小時代4': 1.0, '瘋狂動物城': 5.0}""" 25 distance = 0 26 commonRatings = False 27 for key in rating1: 28 if key in rating2: 29 distance += abs(rating1[key] - rating2[key]) 30 commonRatings = True 31 #兩個打分序列之間有公共打分電影 32 if commonRatings: 33 return distance 34 #無公共打分電影 35 else: 36 return -1 37 38 def cos_dis(rating1, rating2): #余弦相似度計算 39 """計算2個打分序列間的cos距離. 輸入的rating1和rating2都是打分dict 40 格式為{'小時代4': 1.0, '瘋狂動物城': 5.0}""" 41 distance = 0 42 dot_product_1 = 0 43 dot_product_2 = 0 44 commonRatings = False 45 46 for score in rating1.values(): 47 dot_product_1 += score^2 48 for score in rating2.values(): 49 dot_product_2 += score^2 50 51 for key in rating1: 52 if key in rating2: 53 distance += rating1[key] * rating2[key] 54 commonRatings = True 55 #兩個打分序列之間有公共打分電影 56 if commonRatings: 57 return 1-distance/sqrt(dot_product_1*dot_product_2) 58 #無公共打分電影 59 else: 60 return -1 61 62 def pearson_dis(rating1, rating2): #皮爾遜相似度計算 63 """計算2個打分序列間的pearson距離. 輸入的rating1和rating2都是打分dict 64 格式為{'小時代4': 1.0, '瘋狂動物城': 5.0}""" 65 sum_xy = 0 66 sum_x = 0 67 sum_y = 0 68 sum_x2 = 0 69 sum_y2 = 0 70 n = 0 71 for key in rating1: 72 if key in rating2: 73 n += 1 74 x = rating1[key] 75 y = rating2[key] 76 sum_xy += x * y 77 sum_x += x 78 sum_y += y 79 sum_x2 += pow(x, 2) 80 sum_y2 += pow(y, 2) 81 # now compute denominator 82 denominator = sqrt(sum_x2 - pow(sum_x, 2) / n) * sqrt(sum_y2 - pow(sum_y, 2) / n) 83 if denominator == 0: 84 return 0 85 else: 86 return (sum_xy - (sum_x * sum_y) / n) / denominator