推薦系統之矩陣分解及C++實現

本文轉載自查看原文 2014-12-25 10:33 3980 Recommender systems/推薦系統/ 推薦系統/ 矩陣分解/ Data mining/數據挖掘

1.引言

矩陣分解(Matrix Factorization, MF)是傳統推薦系統最為經典的算法，思想來源於數學中的奇異值分解(SVD), 但是與SVD 還是有些不同，形式就可以看出SVD將原始的評分矩陣分解為3個矩陣，而推薦本文要介紹的MF是直接將一個矩陣分解為兩個矩陣，一個包含Users 的因子向量，另一個包含着Items 的因子向量。

2.原理簡介

假如電影分為三類：動畫片，武打片，紀錄片，而某一部電影對應這三類的隸屬度分別為 0, 0.2, 0.7,可以看出這是一部紀錄片里面有些武打成分，現在給定某個用戶對着三類電影的喜歡程度用0 到1 之間的值表示分別為 0.1,0.6,0.2, 可以看出該用戶最喜歡武打片，而不怎么喜歡其他兩種，於是可以預測用戶對剛才的電影打分（喜歡程度）為：0*0.1+0.2*0.6+0.7*0.2 = 0.26

矩陣分解的動機來源於此，因為利用用戶的歷史評分矩陣（參考我的上一篇推薦系統之協同過濾的原理及C++實現），如果能夠得到反映每一用戶的對每個Item喜好的因子向量，同時得到每個Item 屬於每一類的隸屬度向量，利用上面的方法就很容易得出每個用戶對每個Item的預測評分，利用這個評分的高低就可以進行推薦高分的Items給相應的用戶了.

例如這個10*10的歷史評分矩陣A, 可以分解為一個10 * 5 的矩陣 B 乘以一個5 * 10 的矩陣 C ,這樣可以把 B 看做是用戶偏好矩陣，里面包含着用戶對每一類Items 的偏好程度的向量，B 的轉置看作是包含着衡量每一個Item 屬於5類的隸屬度的向量，當然這個 5 可以是自己設定的任意值，但是原則上要求要比原來的矩陣A中的列數或者行數小，起到一個降維的作用。B 和 C的初始值可以隨機初始化，然后B和C相乘得到評分，與歷史真實評分對比，通過梯度下降算法不斷調整B和C中的值，使得B和C相乘后得到的矩陣與真實的歷史評分矩陣之間的差別越小越好，最終得到較好的B 和C 就可以用來預測用戶對任意Item的評分了，更加詳細的解釋參考：Matrix_factorization_techniques_for_recommender_systems.pdf

3.實現

本次實現的是一個帶偏置的矩陣分解，數據集是movielens.rar，已經處理成了矩陣形式

讀取和保存txt數據的頭文件

 1 #ifndef READANDWRITEDATA_H  2 #define READANDWRITEDATA_H
 3 #include <iostream>
 4 #include <fstream>
 5 #include <vector>
 6 #include <string>
 7 
 8 using namespace std;  9 
10 template <typename T>
11 vector<vector<T> > txtRead(string FilePath,int row,int col) 12 { 13  ifstream input(FilePath); 14     if (!input.is_open()) 15  { 16         cerr << "File is not existing, check the path: \n" <<  FilePath << endl; 17         exit(1); 18  } 19     vector<vector<T> > data(row, vector<T>(col,0)); 20     for (int i = 0; i < row; ++i) 21  { 22         for (int j = 0; j < col; ++j) 23  { 24             input >> data[i][j]; 25  } 26  } 27     return data; 28 } 29 
30 template<typename T>
31 void txtWrite(vector<vector<T> > Matrix, string dest) 32 { 33  ofstream output(dest); 34     vector<vector<T> >::size_type row = Matrix.size(); 35     vector<T>::size_type col = Matrix[0].size(); 36     for (vector<vector<T> >::size_type i = 0; i < row; ++i) 37  { 38         for (vector<T>::size_type j = 0; j < col; ++j) 39  { 40             output << Matrix[i][j]; 41  } 42         output << endl; 43  } 44 } 45 #endif

評價函數，這里還是采用RMSE來評價

 1 #ifndef EVALUATE_H  2 #define EVALUATE_H
 3 #include <cmath>
 4 #include <vector>
 5 using namespace std;  6 double ComputeRMSE(vector<vector<double> > predict, vector<vector<double> > test)  7 {  8     int Counter = 0;  9     double sum = 0; 10     for (vector<vector<double> >::size_type i = 0; i < test.size(); ++i) 11  { 12         for (vector<double>::size_type j = 0; j < test[0].size(); ++j) 13  { 14             if (predict[i][j] && test[i][j]) 15  { 16                 ++Counter; 17                 sum += pow((test[i][j] - predict[i][j]), 2); 18  } 19  } 20  } 21     return sqrt(sum / Counter); 22 } 23 
24 #endif

最后是主程序

 1 #include "Evaluate.h"
 2 #include "ReadAndWriteData.h"
 3 
 4 #include <cmath>
 5 #include <algorithm>
 6 #include <vector>
 7 #include <iostream>
 8 
 9 using namespace std;  10 
 11 
 12 double InnerProduct(vector<double> A, vector<double> B) //計算兩個向量的內積  13 {  14     double res = 0;  15     for(vector<double>::size_type i = 0; i < A.size(); ++i)  16  {  17         res += A[i] * B[i];  18  }  19     return res;  20 }  21 
 22 template<typename T> //對矩陣（二維數組）進行轉置操作
 23 vector<vector<T> > Transpose(vector<vector<T> > Matrix)  24 {  25     unsigned row = Matrix.size();  26     unsigned col = Matrix[0].size();  27     vector<vector<T> > Trans(col,vector<T>(row,0));  28     for (unsigned i = 0; i < col; ++i)  29  {  30         for (unsigned j = 0; j < row; ++j)  31  {  32             Trans[i][j] = Matrix[j][i];  33  }  34  }  35     return Trans;  36 }  37 
 38 vector<vector<double> > BiasedMF(vector<vector<double> >  train, double lr, double penalty,  39     int maxItr)  40 {  41     unsigned row = train.size();  42     unsigned col = train[0].size();  43     //計算全局平均分
 44     double avg = 0;  45     int Counter = 0;  46     for (unsigned i = 0; i < row; ++i)  47  {  48         for(unsigned j = 0; j < col; ++j)  49  {  50             if (train[i][j])  51  {  52                 avg += train[i][j];  53                 ++Counter;  54  }  55  }  56  }  57     avg /= Counter;  58     //初始化Items偏置
 59     vector<double> ItemsBias(col,0);  60     vector<vector<double> > Transtrain = Transpose(train);  61     for (unsigned i = 0; i < col; ++i)  62  {  63         int Counter = 0;  64         double sum  = 0;  65         for (unsigned j = 0; j < row; ++j)  66  {  67             if (Transtrain[i][j])  68  {  69                 sum +=  Transtrain[i][j] - avg;  70                 ++Counter;  71  }  72                 
 73  }  74         ItemsBias[i] = sum / (25 + Counter);  75  }  76 
 77     //初始化Users偏置
 78     vector<double> UsersBias(row, 0);  79     for (unsigned i = 0; i < row; ++i)  80  {  81         int Counter = 0;  82         double sum  = 0;  83         for (unsigned j = 0; j < col; ++j)  84  {  85             if (train[i][j])  86  {  87                 sum +=  train[i][j] - avg - ItemsBias[j];  88                 ++Counter;  89  }  90  }  91         UsersBias[i] = sum / (10 + Counter);  92  }  93 
 94     //初始化Users和Items對應的矩陣
 95     unsigned k = 10;  96     vector<vector<double> > predict(row,vector<double>(col, 0));  97     vector<vector<double> > Users(row, vector<double>(k, 0));  98     vector<vector<double> > Items(col, vector<double>(k, 0));  99 
100 
101     //梯度下降迭代
102     double rmse = 100; 103     int it = 0; 104     while(it < maxItr) 105  { 106         for (unsigned i = 0; i < row; ++i) 107  { 108             for (unsigned j = 0; j < col; ++j) 109  { 110                 predict[i][j] = InnerProduct(Users[i],Items[j]) + UsersBias[i] 111                             + ItemsBias[j]; 112  } 113  } 114         double new_rmse = ComputeRMSE(predict, train); 115         if (new_rmse < rmse) 116             rmse = new_rmse; 117         cout << "第 "<< it << "次迭代:" << endl; 118         cout << "rmse is: " << rmse << endl; 119         for (unsigned i = 0; i < row; ++i) 120  { 121             for (unsigned j = 0; j < col; ++j) 122  { 123                 if (train[i][j]) 124  { 125                     double err = train[i][j] - predict[i][j]; 126                     //更新User i 和Item j 的因子向量
127                     for (unsigned t = 0; t < k; ++t) 128  { 129                         double tmp = Users[i][t]; 130                         Users[i][t] += lr *(err * Items[j][t] - penalty * Users[i][t]); 131                         Items[j][t] += lr * (err * tmp - penalty * Items[j][t]); 132  } 133                     //更新User i和Item j的偏差
134                     double tmp =  UsersBias[i] + ItemsBias[j] - avg; 135                     UsersBias[i] += lr * (err - penalty * tmp); 136                     ItemsBias[j] += lr * (err - penalty * tmp); 137  } 138  } 139  } 140         ++it; 141  } 142     return predict; 143 } 144 
145 int main() 146 { 147   
148     string FilePath1("E:\\Matlab code\\recommendation system\\data\\movielens\\train.txt"); 149     string FilePath2("E:\\Matlab code\\recommendation system\\data\\movielens\\test.txt"); 150     
151     int row = 943; 152     int col = 1682; 153     vector<vector<double> > train = txtRead<double>(FilePath1, row, col); 154     vector<vector<double> > predict = BiasedMF(train, 0.001, 0.003,100); 155     txtWrite(predict, "predict.txt"); 156     vector<vector<double> > test = txtRead<double>(FilePath2, 462, 1591); 157     double rmse = ComputeRMSE(predict,test); 158     cout << "ProbeRMSE is " << rmse <<endl; 159     return 0; 160 }

4.運行

下面是運行過程中的截圖，可以看出運行過程中RMSE逐漸減小，表示與真實的歷史評分矩陣差別在減小，由於時間關系沒有運行完，根據以前在Matlab上的運行結果，最終的RMSE應該可以達到0.92左右，當然這只是在訓練集上的RMSE，最終效果要測出在測試集上的RMSE, 要比上一篇講到的基於用戶的協同過濾好一些，關於用戶和Items因子向量的初始化會對結果有一定影響，本文中只是全部初始化為0其實不太好，有興趣的讀者可以自己嘗試其他分布函數來初始化，但是總體上不會有什么太大的影響，有什么問題可以聯系我。

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 矩陣LU分解的MATLAB與C++實現矩陣分解(Matrix Factorization)與推薦系統淺談矩陣分解在推薦系統中的應用拓端tecdat|python機器學習：推薦系統實現（以矩陣分解來協同過濾）推薦系統之協同過濾的原理及C++實現推薦系統之基於二部圖的個性化推薦系統原理及C++實現 SVD++：推薦系統的基於矩陣分解的協同過濾算法的提高基於矩陣分解的推薦算法，簡單入門 Numpy實現SVD矩陣分解自己動手寫一個推薦系統,推薦系統小結,推薦系統：總體介紹、推薦算法、性能比較, 漫談“推薦系統”, 淺談矩陣分解在推薦系統中的應用