1. 程式人生 > >Mahout--最基本的推薦系統的JAVA程式碼

Mahout--最基本的推薦系統的JAVA程式碼

package mp05.com;
import java.io.File;
import java.io.IOException;
import java.util.List;

import org.apache.mahout.cf.taste.common.TasteException;
import org.apache.mahout.cf.taste.eval.RecommenderBuilder;
import org.apache.mahout.cf.taste.eval.RecommenderEvaluator;
import org.apache.mahout.cf.taste
.impl.eval.AverageAbsoluteDifferenceRecommenderEvaluator; import org.apache.mahout.cf.taste.impl.model.file.FileDataModel; import org.apache.mahout.cf.taste.impl.neighborhood.NearestNUserNeighborhood; import org.apache.mahout.cf.taste.impl.neighborhood.ThresholdUserNeighborhood; import org.apache.mahout
.cf.taste.impl.recommender.GenericItemBasedRecommender; import org.apache.mahout.cf.taste.impl.recommender.GenericUserBasedRecommender; import org.apache.mahout.cf.taste.impl.similarity.EuclideanDistanceSimilarity; import org.apache.mahout.cf.taste.impl.similarity.PearsonCorrelationSimilarity; import org.apache
.mahout.cf.taste.impl.similarity.TanimotoCoefficientSimilarity; import org.apache.mahout.cf.taste.model.DataModel; import org.apache.mahout.cf.taste.neighborhood.UserNeighborhood; import org.apache.mahout.cf.taste.recommender.RecommendedItem; import org.apache.mahout.cf.taste.recommender.Recommender; import org.apache.mahout.cf.taste.similarity.ItemSimilarity; import org.apache.mahout.cf.taste.similarity.UserSimilarity; public class RecommenderIntro { //下面是一個基於使用者的簡單的推薦 //探究使用者與使用者之間的相似性,簡單的說就是你有一個好基友,他喜歡這首歌,那麼你喜歡這首歌的可能性很大。 public static void main(String[] args) throws TasteException, Exception { try { DataModel model=new FileDataModel(new File("/home/xuyao/mahout/test_data/intro.csv")); //UserSimilarity封裝了使用者間相似性的概念 UserSimilarity similarity=new PearsonCorrelationSimilarity(model); //UserNeighborhood封裝了最相似使用者組的概念. 2是使用者的鄰域,指的是最相似的幾個使用者 UserNeighborhood neighborhood=new NearestNUserNeighborhood(2,similarity,model); //Recommender推薦引擎 Recommender recommender=new GenericUserBasedRecommender(model,neighborhood,similarity); List<RecommendedItem> recommendations=recommender.recommend(1,1); for(RecommendedItem recommendation : recommendations) System.out.println(recommendation); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } evaluator(); } //配置並評估一個推薦程式,這裡也是基於使用者的推薦 public static void evaluator() throws IOException, TasteException{ DataModel model=new FileDataModel(new File("/home/xuyao/mahout/ua.base")); RecommenderEvaluator evaluator=new AverageAbsoluteDifferenceRecommenderEvaluator(); RecommenderBuilder builder =new RecommenderBuilder() { public Recommender buildRecommender(DataModel model) throws TasteException { //PearsonCorrelationSimilarity:相似性度量標準--皮爾遜相關係數 UserSimilarity similarity=new PearsonCorrelationSimilarity(model); //EuclideanDistanceSimilarity: 相似性度量標準--歐式距離 UserSimilarity similarity_2=new EuclideanDistanceSimilarity(model); //TanimotoCoefficientSimilarity: 相似性度量標準--谷本系數--完全拋開偏好值 UserSimilarity similarity_3=new TanimotoCoefficientSimilarity(model); //NearestNUserNeighborhood :固定大小的鄰域。。改變這個100可以得到不同的打分,所以這個是可以用來調優的 UserNeighborhood neighborhood=new NearestNUserNeighborhood(100,similarity,model); //下面是另一個表示鄰域的,用的是基於閾值的鄰域。。其中0.5為可調優。 UserNeighborhood neighborhood_2=new ThresholdUserNeighborhood(0.5, similarity, model); return new GenericUserBasedRecommender(model, neighborhood, similarity); } }; //0.9指的是訓練90%的資料,測試10%的資料。 而1.0指的是輸入的資料的比例。 這裡表示資料集全部輸入,其中90%用來訓練,另外10%用來測試。 double socre =evaluator.evaluate(builder, null, model, 0.9, 1.0); //這個socre表示這個模型的打分,分數越小表示這個模型越好。 System.out.println(socre); } //下面是基於物品的推薦,簡單的說就是你的電腦有360安全衛士,360防毒,360瀏覽器,於是說你比較喜歡360的產品,就給你推薦360WIFI。 public static void evaluator_2() throws IOException{ DataModel model=new FileDataModel(new File("/home/xuyao/mahout/ua.base")); RecommenderBuilder builder =new RecommenderBuilder() { public Recommender buildRecommender(DataModel model) throws TasteException { ItemSimilarity similarity =new PearsonCorrelationSimilarity(model); return new GenericItemBasedRecommender(model, similarity); } }; } }
1,101,5  

1,102,3  

1,103,2.5  

2,101,2  

2,102,2.5  

2,103,5  

2,104,2  

3,101,2.5  

3,104,4  

3,105,4.5  

3,107,5  

4,101,5  

4,103,3  

4,104,4.5  

4,106,4  

5,101,4  

5,102,3  

5,103,2  

5,104,4  

5,105,3.5  

5,106,4