Mahout基於使用者的協同過濾演算法的例子
每行測試資料分別標識使用者id(uid),物品id(itemid),評分(rating),評分時間(time)
3464,2502,3,973282547
3464,3160,2,973282494
3464,2505,3,967175070
3464,1703,2,967248043
3464,1704,5,967246680
3464,3163,1,967174266
3464,2369,4,973282339
3464,1569,4,967247436
3464,896,3,967247557
3464,3316,3,973282934
3464,2517,3,967174139
3464,3174,4,967174266
3464,3175,2,973282421
3464,3176,3,967174298
3464,1573,3,967247865
3464,3178,4,967247587
3464,105,3,967248019
3464,3325,4,973282547
3464,1721,3,967247042
3464,3327,4,973282892
3464,3185,3,967174298
3464,1727,4,967248268
3464,111,5,967174438
3464,3186,4,967242949
3464,1729,3,967247165
3464,1584,3,967247078
3464,2387,3,967247884
3464,2389,4,967175256
3464,1589,4,967248019
3464,1732,4,967247306
3464,2391,4,967246935
3464,2395,4,973282625
3464,2396,5,967246752
3464,1597,4,967174960
3464,2541,3,967247865
package userBased;
import java.io.File;
import java.util.List;
import org.apache.mahout.cf.taste.impl.model.file.FileDataModel;
import org.apache.mahout.cf.taste.impl.neighborhood.NearestNUserNeighborhood;
import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender;
import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood;
import org.apache.mahout.cf.taste.recommender.RecommendedItem;
import org.apache.mahout.cf.taste.recommender.Recommender;
import org.apache.mahout.cf.taste.similarity.UserSimilarity;
/**
* mahout基於使用者的協同過濾演算法
*
*/
public class UserBased {
public static void main(String[] args) throws Exception {
DataModel model = new FileDataModel(new File("F:/ml-1m/ratings.dat"));
/**
* 使用者偏好資料包含評分
歐氏距離:EuclideanDistanceSimilarity
皮爾森距離:PearsonCorrelationSimilarity
餘弦距離:UncenteredCosineSimilarity
使用者偏好資料不包含評分
曼哈頓距離:CityBlockSimilarity
對數似然距離: LogLikelihoodSimilarity
*/
UserSimilarity similarity = new PearsonCorrelationSimilarity(model);
// 相鄰使用者UserNeighborhood
/**
* NearestNUserNeighborhood
指定距離最近的N個使用者作為鄰居。
示例:UserNeighborhood unb = new NearestNUserNeighborhood(10, us, dm);
三個引數分別是: 鄰居的個數,使用者相似度,資料模型
鄰居個數的大小直接決定了推薦結果的近似程度和計算的複雜度
ThresholdUserNeighborhood
指定距離最近的一定百分比的使用者作為鄰居。
示例:UserNeighborhood unb = new ThresholdUserNeighborhood(0.2, us, dm);
三個引數分別是: 閥值(取值範圍0到1之間),使用者相似度,資料模型
*/
UserNeighborhood neighborhood = new NearestNUserNeighborhood(500, similarity, model);
//根據資料模型、使用者相似度模型、以及鄰近值構建推薦引擎
Recommender recommender = new GenericUserBasedRecommender(model, neighborhood, similarity);
// 向用戶100推薦2個商品
List<RecommendedItem> recommendations = recommender.recommend(100, 2);
for (RecommendedItem recommendation : recommendations) {
// 輸出推薦結果
System.out.println(recommendation);
}
}
}