mahout learning 代碼示例


一, Introduction

復制代碼
package mia.recommender.ch02;
//=分析導入包可以看出mahout的包分為主要類以及它們的實現類=
import org.apache.mahout.cf.taste.impl.model.file.*;
import org.apache.mahout.cf.taste.impl.neighborhood.*;
import org.apache.mahout.cf.taste.impl.recommender.*;
import org.apache.mahout.cf.taste.impl.similarity.*;
import org.apache.mahout.cf.taste.model.*;
import org.apache.mahout.cf.taste.neighborhood.*;
import org.apache.mahout.cf.taste.recommender.*;
import org.apache.mahout.cf.taste.similarity.*;
import java.io.*;
import java.util.*;

class RecommenderIntro {

public static void main(String[] args) throws Exception {
//=如何從csv的文件中構建mahout的數據表示,DataModel就是來表示<user,item,rating>的知識的=
DataModel model = new FileDataModel(new File("intro.csv"));
//=user-based的第一步就是找相似用戶,所以要定義用戶的相似性,包括用什么相似性度量,以及鄰居的參數=
UserSimilarity similarity = new PearsonCorrelationSimilarity(model);
UserNeighborhood neighborhood = new NearestNUserNeighborhood(2, similarity, model);
//=一旦確定了相鄰用戶,那么一個普通的user-based推薦器就可以被構建起來了=
Recommender recommender = new GenericUserBasedRecommender(
model, neighborhood, similarity);
//=我們可以來使用它,這里是向用戶1推薦1個商品=
List<RecommendedItem> recommendations =
recommender.recommend(1, 1);
//=推薦的結果可以輸出,這里是:RecommendedItem[item:104, value:4.257081]=
for (RecommendedItem recommendation : recommendations) {
System.out.println(recommendation);
}
}
}
復制代碼

二, Evaluation

復制代碼
package mia.recommender.ch02;

import org.apache.mahout.cf.taste.impl.model.file.*;
import org.apache.mahout.cf.taste.impl.neighborhood.*;
import org.apache.mahout.cf.taste.impl.recommender.*;
import org.apache.mahout.cf.taste.impl.similarity.*;
import org.apache.mahout.cf.taste.model.*;
import org.apache.mahout.cf.taste.neighborhood.*;
import org.apache.mahout.cf.taste.recommender.*;
import org.apache.mahout.cf.taste.similarity.*;
import java.io.*;
import java.util.*;
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.eval.RecommenderBuilder;
import org.apache.mahout.cf.taste.eval.RecommenderEvaluator;
import org.apache.mahout.cf.taste.impl.eval.AverageAbsoluteDifferenceRecommenderEvaluator;
import org.apache.mahout.common.RandomUtils;

/**
*
*
@author wentingtu <wentingtu09 at gmail dot com>
*/
public class RecommenderEvalu
{
public static void main(String[] args) throws IOException, TasteException
{
//=導入org.apache.mahout.common.RandomUtils;=
//這個是產生唯一的種子使得在划分訓練和測試數據的時候具有唯一性=
RandomUtils.useTestSeed();

DataModel model = new FileDataModel(new File("intro.csv"));
//構建評估器,這里用到的性能度量是每個sum( |預測值 - 真實值| ) / 值的個數
RecommenderEvaluator evaluator = new AverageAbsoluteDifferenceRecommenderEvaluator();
//=導入 org.apache.mahout.cf.taste.eval.RecommenderBuilder;=
//這里要涉及用到了一個定義推薦器構造方法的類:RecommenderBuilder
RecommenderBuilder builder = new RecommenderBuilder()
{
//使用方法是重載buildRecommender函數,函數里是構造推薦器的方法
@Override
public Recommender buildRecommender(DataModel model)
throws TasteException
{
UserSimilarity similarity = new PearsonCorrelationSimilarity(model);
UserNeighborhood neighborhood =
new NearestNUserNeighborhood(2, similarity, model);
return new GenericUserBasedRecommender(model, neighborhood, similarity);
}
};
//=導入 org.apache.mahout.cf.taste.eval.RecommenderEvaluator;=
//調用評估器,輸入有上面構造的推薦器方法,數據模型,訓練/全部 比例,驗證數據/數據 比例
double score = evaluator.evaluate(builder, null, model, 0.7, 1.0);
//輸出評價結果:1.0 證明最后的估計結果是 AverageAbsoluteDifference = 1.0
System.out.println(score);
}
}
復制代碼

 

復制代碼
package mia.recommender.ch02;

import org.apache.mahout.cf.taste.impl.model.file.*;
import org.apache.mahout.cf.taste.impl.neighborhood.*;
import org.apache.mahout.cf.taste.impl.recommender.*;
import org.apache.mahout.cf.taste.impl.similarity.*;
import org.apache.mahout.cf.taste.model.*;
import org.apache.mahout.cf.taste.neighborhood.*;
import org.apache.mahout.cf.taste.recommender.*;
import org.apache.mahout.cf.taste.similarity.*;
import java.io.*;
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.eval.IRStatistics;
import org.apache.mahout.cf.taste.eval.RecommenderBuilder;
import org.apache.mahout.cf.taste.eval.RecommenderIRStatsEvaluator;
import org.apache.mahout.cf.taste.impl.eval.GenericRecommenderIRStatsEvaluator;
import org.apache.mahout.common.RandomUtils;

/**
*
*
@author Administrator
*/
public class RecommenderEvaluPrecisionRecall {
public static void main(String[] args) throws IOException, TasteException {
RandomUtils.useTestSeed();
DataModel model = new FileDataModel(new File("intro.csv"));
//=導入org.apache.mahout.cf.taste.eval.RecommenderIRStatsEvaluator;=
//構建評估器
RecommenderIRStatsEvaluator evaluator =
new GenericRecommenderIRStatsEvaluator();

RecommenderBuilder recommenderBuilder = new RecommenderBuilder() {
@Override
public Recommender buildRecommender(DataModel model)
throws TasteException {
UserSimilarity similarity = new PearsonCorrelationSimilarity(model);
UserNeighborhood neighborhood =
new NearestNUserNeighborhood(2, similarity, model);
return new GenericUserBasedRecommender(model, neighborhood, similarity);
}
};
//使用評估器,並設定評估期的參數
//2表示"precision and recall at 2"即相當於推薦top2,然后在top-2的推薦上計算准確率和召回率
//既然涉及到准確率和召回率,這里就有一個"hit"的定義,就是怎樣的一個推薦算是good
//下面的參數設置是這樣定義"good"的:利用閾值threshold = µ + σ
//即 user's average preference value µ plus one standard deviation σ
//如果一個推薦,它的真實分值是高於threshold的,那么它就是"good"
IRStatistics stats = evaluator.evaluate(
recommenderBuilder, null, model, null, 2,
GenericRecommenderIRStatsEvaluator.CHOOSE_THRESHOLD,
1.0);
//輸出為0.75 1.0
System.out.println(stats.getPrecision());
System.out.println(stats.getRecall());
}
}
復制代碼

三,Set preference

復制代碼
package mia.recommender.ch03;

import org.apache.mahout.cf.taste.impl.model.GenericUserPreferenceArray;
import org.apache.mahout.cf.taste.model.Preference;
import org.apache.mahout.cf.taste.model.PreferenceArray;

/**
*
*
@author Administrator
*/
public class SetPrefinPreferenceArray {

/**
*
@param args the command line arguments
*/
public static void main(String[] args) {
PreferenceArray user1Prefs = new GenericUserPreferenceArray(2);
user1Prefs.setUserID(0, 1L);
user1Prefs.setItemID(0, 101L);
user1Prefs.setValue(0, 2.0f);
user1Prefs.setItemID(1, 102L);
user1Prefs.setValue(1, 3.0f);
Preference pref = user1Prefs.get(1);
}
}
復制代碼

四,User-based CF

復制代碼
package mia.recommender.ch05;

import java.io.File;
import java.io.IOException;
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.eval.RecommenderBuilder;
import org.apache.mahout.cf.taste.eval.RecommenderEvaluator;
import org.apache.mahout.cf.taste.impl.eval.AverageAbsoluteDifferenceRecommenderEvaluator;
import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
import org.apache.mahout.cf.taste.impl.neighborhood.NearestNUserNeighborhood;
import org.apache.mahout.cf.taste.impl.neighborhood.ThresholdUserNeighborhood;
import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender;
import org.apache.mahout.cf.taste.impl.similarity.EuclideanDistanceSimilarity;
import org.apache.mahout.cf.taste.impl.similarity.LogLikelihoodSimilarity;
import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity;
import org.apache.mahout.cf.taste.impl.similarity.TanimotoCoefficientSimilarity;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
import org.apache.mahout.cf.taste.recommender.Recommender;
import org.apache.mahout.cf.taste.similarity.UserSimilarity;
import org.apache.mahout.common.RandomUtils;

/**
*
*
@author Administrator
*/
public class UserBasedCF {

public static void recommenderModelEvaluation(DataModel model) throws TasteException {

RecommenderEvaluator evaluator = new AverageAbsoluteDifferenceRecommenderEvaluator();
RandomUtils.useTestSeed();

RecommenderBuilder builder = new RecommenderBuilder() {
//=============實驗參數設置===============
//1.K近鄰 or 閾值近鄰
//近鄰:K?
//閾值近鄰:threshold?
//2.相似度量:Euclidean , Pearson , Log-likelihood , Tanimoto
char similarityPattern = 'E';//'E' or 'P' or 'L' or 'T'
char neighborhoodPattern = 'K';//'K' or 'T'
int k = 2;
double threshold = 0.5;

@Override
public Recommender buildRecommender(DataModel dm) throws TasteException {
UserSimilarity similarity = null;
UserNeighborhood neighborhood = null;
switch (similarityPattern) {
case 'E': {
similarity = new EuclideanDistanceSimilarity(dm);
}
case 'P': {
similarity = new PearsonCorrelationSimilarity(dm);
}
case 'L': {
similarity = new LogLikelihoodSimilarity(dm);
}
case 'T': {
similarity = new TanimotoCoefficientSimilarity(dm);
}
}

switch (neighborhoodPattern) {
case 'K': {
neighborhood = new NearestNUserNeighborhood(k, similarity, dm);
}

case 'T': {
neighborhood = new ThresholdUserNeighborhood(threshold, similarity, dm);
}
}
return new GenericUserBasedRecommender(dm, neighborhood, similarity);
}
};

double score = evaluator.evaluate(builder, null, model, 0.7, 1.0);
System.out.println(score);
}

/**
*
@param args the command line arguments
*/
public static void main(String[] args) throws IOException, TasteException {
DataModel model = new FileDataModel(new File("data/dating/ratings.dat"));
recommenderModelEvaluation(model);

}
}
復制代碼

五,Item-based CF

復制代碼
package mia.recommender.ch05;

import java.io.File;
import java.io.IOException;
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.eval.RecommenderBuilder;
import org.apache.mahout.cf.taste.eval.RecommenderEvaluator;
import org.apache.mahout.cf.taste.impl.eval.AverageAbsoluteDifferenceRecommenderEvaluator;
import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
import org.apache.mahout.cf.taste.impl.recommender.GenericItemBasedRecommender;
import org.apache.mahout.cf.taste.impl.similarity.EuclideanDistanceSimilarity;
import org.apache.mahout.cf.taste.impl.similarity.LogLikelihoodSimilarity;
import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity;
import org.apache.mahout.cf.taste.impl.similarity.TanimotoCoefficientSimilarity;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.recommender.Recommender;
import org.apache.mahout.cf.taste.similarity.ItemSimilarity;
import org.apache.mahout.common.RandomUtils;

/**
*
*
@author Administrator
*/
public class ItemBasedCF {

public static void recommenderModelEvaluation(DataModel model) throws TasteException {

RecommenderEvaluator evaluator = new AverageAbsoluteDifferenceRecommenderEvaluator();
RandomUtils.useTestSeed();

RecommenderBuilder builder = new RecommenderBuilder() {
//=============實驗參數設置===============
//1.K近鄰 or 閾值近鄰
//近鄰:K?
//閾值近鄰:threshold?
//2.相似度量:Euclidean , Pearson , Log-likelihood , Tanimoto
char similarityPattern = 'E';//'E' or 'P' or 'L' or 'T'
@Override
public Recommender buildRecommender(DataModel dm) throws TasteException {
ItemSimilarity similarity = null;
switch (similarityPattern) {
case 'E': {
similarity = new EuclideanDistanceSimilarity(dm);
}
case 'P': {
similarity = new PearsonCorrelationSimilarity(dm);
}
case 'L': {
similarity = new LogLikelihoodSimilarity(dm);
}
case 'T': {
similarity = new TanimotoCoefficientSimilarity(dm);
}
}

return new GenericItemBasedRecommender(dm, similarity);
}
};

double score = evaluator.evaluate(builder, null, model, 0.7, 1.0);
System.out.println(score);
}

/**
*
@param args the command line arguments
*/
public static void main(String[] args) throws IOException, TasteException {
DataModel model = new FileDataModel(new File("data/dating/ratings.dat"));
recommenderModelEvaluation(model);

}
}
復制代碼


六,Slope one CF

復制代碼
package mia.recommender.ch05;

import java.io.File;
import java.io.IOException;
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.common.Weighting;
import org.apache.mahout.cf.taste.eval.RecommenderBuilder;
import org.apache.mahout.cf.taste.eval.RecommenderEvaluator;
import org.apache.mahout.cf.taste.impl.eval.AverageAbsoluteDifferenceRecommenderEvaluator;
import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
import org.apache.mahout.cf.taste.impl.recommender.slopeone.MemoryDiffStorage;
import org.apache.mahout.cf.taste.impl.recommender.slopeone.SlopeOneRecommender;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.recommender.Recommender;
import org.apache.mahout.cf.taste.recommender.slopeone.DiffStorage;
import org.apache.mahout.common.RandomUtils;

/**
*
*
@author Administrator
*/
public class SlopeOneCF {

public static void recommenderModelEvaluation(DataModel model) throws TasteException {
RecommenderEvaluator evaluator = new AverageAbsoluteDifferenceRecommenderEvaluator();
RandomUtils.useTestSeed();
RecommenderBuilder builder = new RecommenderBuilder() {

long diffStorageNb = 100000;

@Override
public Recommender buildRecommender(DataModel dm) throws TasteException {
DiffStorage diffStorage = new MemoryDiffStorage(dm, Weighting.WEIGHTED, diffStorageNb);
return new SlopeOneRecommender(dm, Weighting.WEIGHTED, Weighting.WEIGHTED, diffStorage);
}
};
}

/**
*
@param args the command line arguments
*/
public static void main(String[] args) throws IOException, TasteException {
DataModel model = new FileDataModel(new File("data/dating/ratings.dat"));
recommenderModelEvaluation(model);
}
}
復制代碼

七,一個示例

復制代碼
package mia.recommender.ch05;

import java.io.File;
import java.io.IOException;
import java.util.List;
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.impl.model.GenericUserPreferenceArray;
import org.apache.mahout.cf.taste.impl.model.PlusAnonymousUserDataModel;
import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.model.PreferenceArray;
import org.apache.mahout.cf.taste.recommender.RecommendedItem;

/**
*
*
@author Administrator
*/
public class LibimsetiWithAnonymousRecommender extends LibimsetiRecommender {

private final PlusAnonymousUserDataModel plusAnonymousModel;

public LibimsetiWithAnonymousRecommender()
throws TasteException, IOException {
this((DataModel) new FileDataModel(new File("data/dating/ratings.dat")));
}

public LibimsetiWithAnonymousRecommender(DataModel model)
throws TasteException, IOException {
//調用父類LibimsetiRecommender的構造函數
super(new PlusAnonymousUserDataModel(model));
//得到PlusAnonymousUserDataModel對象
plusAnonymousModel =
(PlusAnonymousUserDataModel) getDataModel();
}
//設計這個推薦器的recommend方法:輸入:匿名用戶的評分信息 輸出:對此匿名用戶的推薦
public synchronized List<RecommendedItem> recommend(
PreferenceArray anonymousUserPrefs, int topN)
throws TasteException {
//利用PlusAnonymousUserDataModel對象的setTempPrefs方法為將匿名用戶加入到數據中,
//並且利用PlusAnonymousUserDataModel.TEMP_USER_ID作為其userID
plusAnonymousModel.setTempPrefs(anonymousUserPrefs);
//調用父類LibimsetiRecommender的recommend方法
//userID現在被PlusAnonymousUserDataModel.TEMP_USER_ID所代替了
List<RecommendedItem> recommendations =
recommend(PlusAnonymousUserDataModel.TEMP_USER_ID, topN, null);
//刪除PlusAnonymousUserDataModel.TEMP_USER_ID與匿名用戶的關聯
plusAnonymousModel.clearTempPrefs();
return recommendations;
}
//創建當前匿名用戶的偽數據
public PreferenceArray creatAnAnonymousPrefs() {
PreferenceArray anonymousPrefs =
new GenericUserPreferenceArray(3);
anonymousPrefs.setUserID(0, PlusAnonymousUserDataModel.TEMP_USER_ID);
anonymousPrefs.setItemID(0, 123L);
anonymousPrefs.setValue(0, 1.0f);
anonymousPrefs.setItemID(1, 123L);
anonymousPrefs.setValue(1, 3.0f);
anonymousPrefs.setItemID(2, 123L);
anonymousPrefs.setValue(2, 2.0f);
return anonymousPrefs;
}

public static void main(String[] args) throws Exception {

LibimsetiWithAnonymousRecommender recommender =
new LibimsetiWithAnonymousRecommender();
List<RecommendedItem> recommendations =
recommender.recommend(recommender.creatAnAnonymousPrefs(), 10);
System.out.println(recommendations);
}
}
復制代碼

 

復制代碼
package mia.recommender.ch05;

import java.io.File;
import java.io.IOException;
import java.util.Collection;
import java.util.List;
import org.apache.mahout.cf.taste.common.Refreshable;
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.impl.common.FastIDSet;
import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
import org.apache.mahout.cf.taste.impl.neighborhood.NearestNUserNeighborhood;
import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender;
import org.apache.mahout.cf.taste.impl.similarity.EuclideanDistanceSimilarity;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
import org.apache.mahout.cf.taste.recommender.IDRescorer;
import org.apache.mahout.cf.taste.recommender.RecommendedItem;
import org.apache.mahout.cf.taste.recommender.Recommender;
import org.apache.mahout.cf.taste.similarity.UserSimilarity;

/**
*
*
@author Administrator
*/
public class LibimsetiRecommender implements Recommender {

private final Recommender libimsetiRecommender;
private final DataModel model;
private final FastIDSet men;
private final FastIDSet women;
//構造函數:一般而言,一個普適的自定義推薦器的輸入應該是:DataModel和額外的知識
//應該將獨立於數據的東西構建好:基本的pure CF推薦器

public LibimsetiRecommender() throws TasteException, IOException {
this((DataModel) new FileDataModel(new File("data/dating/ratings.dat")));
}
//應該將獨立於數據的東西構建好:基本的pure CF推薦器,即將libimsetiRecommender設為pure CF
public LibimsetiRecommender(DataModel model) throws TasteException, IOException {
UserSimilarity similarity = new EuclideanDistanceSimilarity(model);
UserNeighborhood neighborhood =
new NearestNUserNeighborhood(2, similarity, model);
libimsetiRecommender = new GenericUserBasedRecommender(model, neighborhood, similarity);
this.model = model;
FastIDSet[] menWomen = GenderRescorer.generateMenWomen(
new File(("gender.dat")));
men = menWomen[0];
women = menWomen[1];
}
//用libimsetiRecommender進行推薦時就加入了由gender信息定義的GenderRescorer
public List<RecommendedItem> recommend(long userID, int topN) throws TasteException {
IDRescorer rescorer = new GenderRescorer(men, women, userID, model);
return libimsetiRecommender.recommend(userID, topN, rescorer);

}
//用libimsetiRecommender也提供了自定義IDRescorer進行推薦的方法
public List<RecommendedItem> recommend(long userID, int topN, IDRescorer idr) throws TasteException {
return libimsetiRecommender.recommend(userID, topN, idr);
}
//這里要注意,由於libimsetiRecommender真正進行preference的估計是要受到GenderRescorer的rescore的影響的
public float estimatePreference(long userID, long itemID) throws TasteException {
IDRescorer rescorer = new GenderRescorer(men, women, userID, model);
return (float) rescorer.rescore(
itemID, libimsetiRecommender.estimatePreference(userID, itemID));
}
//這個可以直接借助於libimsetiRecommender的setPreference
public void setPreference(long userID, long itemID, float value) throws TasteException {
libimsetiRecommender.setPreference(userID, itemID, value);
}
//這個可以直接借助於libimsetiRecommender的removePreference
public void removePreference(long userID, long itemID) throws TasteException {
libimsetiRecommender.removePreference(userID, itemID);
}
//這個可以直接借助於libimsetiRecommender的getDataModel
public DataModel getDataModel() {
return libimsetiRecommender.getDataModel();
}
//這個可以直接借助於libimsetiRecommender的refresh
public void refresh(Collection<Refreshable> alreadyRefreshed) {
libimsetiRecommender.refresh(alreadyRefreshed);
}
}
復制代碼

 

復制代碼
package mia.recommender.ch05;

import java.io.File;
import java.io.IOException;
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.impl.common.FastIDSet;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.model.PreferenceArray;
import org.apache.mahout.cf.taste.recommender.IDRescorer;
import org.apache.mahout.common.iterator.FileLineIterable;

/**
*
*
@author Administrator
*/
public class GenderRescorer implements IDRescorer {

private final FastIDSet men;//存放當前數據模型對應的所有male selectableUser
private final FastIDSet women;//存放當前數據模型對應的所有female selectableUser
private final FastIDSet usersRateMoreMen;//
private final FastIDSet usersRateLessMen;
private final boolean likeMen;//表明針對一個用戶(userID定義)一個profileID是否應該過濾

public GenderRescorer(
FastIDSet men,
FastIDSet women,
long userID, DataModel model)
throws TasteException {
this.men = men;
this.women = women;
this.usersRateMoreMen = new FastIDSet();
this.usersRateLessMen = new FastIDSet();
this.likeMen = ratesMoreMen(userID, model);
}
//產生數據對應的men和women集合
public static FastIDSet[] generateMenWomen(File genderFile)
throws IOException {
FastIDSet men = new FastIDSet(50000);
FastIDSet women = new FastIDSet(50000);
for (String line : new FileLineIterable(genderFile)) {
int comma = line.indexOf(',');
char gender = line.charAt(comma + 1);
if (gender == 'U') {
continue;
}
long profileID = Long.parseLong(line.substring(0, comma));
if (gender == 'M') {
men.add(profileID);
} else {
women.add(profileID);
}
}
men.rehash();
women.rehash();
return new FastIDSet[]{men, women};
}
//判斷userID對應的用戶是不是更喜歡男性,從他/她評過分的那些用戶的性別來統計
private boolean ratesMoreMen(long userID, DataModel model)
throws TasteException {
if (usersRateMoreMen.contains(userID)) {
return true;
}
if (usersRateLessMen.contains(userID)) {
return false;
}
PreferenceArray prefs = model.getPreferencesFromUser(userID);
int menCount = 0;
int womenCount = 0;
for (int i = 0; i < prefs.length(); i++) {
long profileID = prefs.get(i).getItemID();
if (men.contains(profileID)) {
menCount++;
} else if (women.contains(profileID)) {
womenCount++;
}
}
boolean ratesMoreMen = menCount > womenCount;
if (ratesMoreMen) {
usersRateMoreMen.add(userID);
} else {
usersRateLessMen.add(userID);
}
return ratesMoreMen;
}
//對於需要過濾的推薦,設置其值為NaN,這是因為他們不是不能推薦的,而是最差的推薦
public double rescore(long profileID, double originalScore) {
return isFiltered(profileID) ? Double.NaN : originalScore;
}
//如果一個用戶是喜歡男性的,而推薦的又是女性,則這個推薦是應該過濾掉的,反之亦然
public boolean isFiltered(long profileID) {
return likeMen ? women.contains(profileID) : men.contains(profileID);
}
}
復制代碼

 

復制代碼
package mia.recommender.ch05;

import java.util.Collection;
import org.apache.mahout.cf.taste.common.Refreshable;
import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.impl.common.FastIDSet;
import org.apache.mahout.cf.taste.similarity.ItemSimilarity;

/**
*
*
@author Administrator
*/
public class GenderItemSimilarity implements ItemSimilarity {
private final FastIDSet men;
private final FastIDSet women;

public GenderItemSimilarity(FastIDSet men, FastIDSet women) {
this.men = men;
this.women = women;
}

public double itemSimilarity(long profileID1, long profileID2) throws TasteException {
Boolean profile1IsMan = isMan(profileID1);
if (profile1IsMan == null) {
return 0.0;
}
Boolean profile2IsMan = isMan(profileID2);
if (profile2IsMan == null) {
return 0.0;
}
return profile1IsMan == profile2IsMan ? 1.0 : -1.0;
}

private Boolean isMan(long profileID) {
if (men.contains(profileID)) {
return Boolean.TRUE;
}
if (women.contains(profileID)) {
return Boolean.FALSE;
}
return null;
}

public double[] itemSimilarities(long itemID1, long[] itemID2s) throws TasteException{
double[] result = new double[itemID2s.length];
for (int i = 0; i < itemID2s.length; i++) {
result[i] = itemSimilarity(itemID1, itemID2s[i]);
}
return result;
}


public long[] allSimilarItemIDs(long l) throws TasteException {
throw new UnsupportedOperationException("Not supported yet.");
}

public void refresh(Collection<Refreshable> clctn) {
throw new UnsupportedOperationException("Not supported yet.");
}

}
復制代碼




免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM