1. 程式人生 > 其它 >資料探勘FPGrowth演算法JAVA實現

資料探勘FPGrowth演算法JAVA實現

技術標籤:java演算法FPGrowth資料探勘關聯規則

實驗結果

在這裡插入圖片描述
最小支援度0.001條件下可以得到準確結果,僅用1.6s
最小可以支援最小支援度為0.0003的計算

程式碼

package com.company;

import java.io.*;
import java.util.*;

class FPNode {
    String name;
    int count = 0;
    FPNode brother = null, parent = null;
    ArrayList<FPNode> children = new ArrayList<>
(); FPNode(String name) { this.name = name; } FPNode(String name, FPNode parent, int count) { this.name = name; this.parent = parent; this.count = count; } } class Item { String name; int support; Item(String name, int support) { this
.name = name; this.support = support; } } class Trade { ArrayList<Item> items = new ArrayList<>(); public Trade(List<String> items, Map<String, Integer> frequentItems) { items.forEach(item -> { if (frequentItems.containsKey(item)) this
.items.add(new Item(item, frequentItems.get(item))); }); this.items.sort((l, r) -> Integer.compare(r.support, l.support));//按支援度從大到小排序 } } class FPTree { int minSupport; FPNode root = new FPNode("root"); Map<String, Integer> frequentItems = new HashMap<>(); Map<String, FPNode> headTable = new HashMap<>(); //該項對應的最後一個節點 Map<String, FPNode> currentPosition = new HashMap<>(); List<List<String>> database; public FPTree(List<List<String>> database, int minSupport) { this.minSupport = minSupport; this.database = database; getFrequentItems(); buildHeadTable(); buildTree(); } //計算支援度 public void getFrequentItems() { Map<String, Integer> supportCount = new HashMap<>(); for (List<String> line : database) for (String item : line) supportCount.merge(item, 1, Integer::sum); supportCount.forEach((item, support) -> { if (support >= minSupport) frequentItems.put(item, support); }); } //建立頭表 public void buildHeadTable() { frequentItems.keySet().forEach(frequentItem -> { headTable.put(frequentItem, new FPNode(frequentItem)); currentPosition.put(frequentItem, headTable.get(frequentItem)); }); } //建立FP-tree public void buildTree() { database.forEach(items -> { Trade trade = new Trade(items, frequentItems); insertTree(trade, root); }); } public int insertTree(Trade trade, FPNode fpNode) { if (trade.items.size() == 0) return 0; Item item = trade.items.remove(0); FPNode nextFPNode; // 查詢該項是否存在 for (FPNode children : fpNode.children) if (children.name.equals(item.name)) { children.count++; nextFPNode = children; return insertTree(trade, nextFPNode); } nextFPNode = new FPNode(item.name, fpNode, 1); fpNode.children.add(nextFPNode); currentPosition.get(item.name).brother = nextFPNode; currentPosition.put(item.name, nextFPNode); return insertTree(trade, nextFPNode); } } class FrequentItem { List<String> items; int support; public FrequentItem(List<String> items, int support) { this.items = items; this.support = support; } } class FPGrowth { int minSupport; List<FrequentItem> patternList = new ArrayList<>(); public FPGrowth(FPTree fpTree, int minSupport) { this.minSupport = minSupport; fpGrowth(fpTree, null); } private void fpGrowth(FPTree fpTree, List<String> suffix) { if (fpTree.root.children.size() == 0) return; fpTree.frequentItems.keySet().forEach(frequentItem -> { List<String> newSuffix = new ArrayList<>(); newSuffix.add(frequentItem); if (suffix != null && !suffix.isEmpty()) newSuffix.addAll(suffix); patternList.add(new FrequentItem(newSuffix, fpTree.frequentItems.get(frequentItem))); //生成條件模式庫 List<List<String>> conditionalPatternDatabase = generateConditionalPatternDatabase(fpTree, frequentItem); //生成條件FPTree FPTree conditionalFPTree = new FPTree(conditionalPatternDatabase, minSupport); fpGrowth(conditionalFPTree, newSuffix); }); } private List<List<String>> generateConditionalPatternDatabase(FPTree fpTree, String frequentItem) { List<List<String>> conditionalPatternDatabase = new ArrayList<>(); FPNode headNode = fpTree.headTable.get(frequentItem); for (FPNode fpNode = headNode.brother; fpNode != null; fpNode = fpNode.brother) { //生成字首路徑 List<String> prefixPath = new ArrayList<>(); for (FPNode fpNode1 = fpNode.parent; fpNode1.parent != null; fpNode1 = fpNode1.parent) prefixPath.add(fpNode1.name); for (int i = 0; i < fpNode.count; i++) conditionalPatternDatabase.add(prefixPath); } return conditionalPatternDatabase; } public List<FrequentItem> getPatternList() { patternList.sort((l, r) -> Integer.compare(r.support, l.support)); return patternList; } } public class Main { private static final List<List<String>> database = new ArrayList<>(); public static void main(String[] args) throws IOException { double minSupport; int count = 0; Scanner scanner = new Scanner(System.in); minSupport = scanner.nextDouble(); long startTime = System.currentTimeMillis(); loadData(); FPTree fpTree = new FPTree(database, (int) Math.ceil(minSupport * database.size())); FPGrowth fpGrowth = new FPGrowth(fpTree, (int) Math.ceil(minSupport * database.size())); for (FrequentItem frequentItem : fpGrowth.getPatternList()) { System.out.println(frequentItem.items + ": " + frequentItem.support); count++; } System.out.println("總數: " + count); long endTime = System.currentTimeMillis(); System.out.println("程式執行時間:" + (endTime - startTime) + "ms"); } private static void loadData() throws IOException { try (BufferedReader bufferedReader = new BufferedReader(new FileReader("retail.dat"))) { String line; while ((line = bufferedReader.readLine()) != null) { String[] temp = line.split(" "); database.add(Arrays.asList(temp)); } } } }