1. 程式人生 > >基於Spark的FPGrowth源碼中難啃的骨頭

基於Spark的FPGrowth源碼中難啃的骨頭

.get valid get transacti suffix ldr nsa orelse tor

  /** Extracts all patterns with valid suffix and minimum count. */
  def extract(
      minCount: Long,
      validateSuffix: T => Boolean = _ => true): Iterator[(List[T], Long)] = {
    summaries.iterator.flatMap { case (item, summary) =>
      if (validateSuffix(item) && summary.count >= minCount) {
        Iterator.single((item :: Nil, summary.count)) 
++ project(item).extract(minCount).map { case (t, c) => (item :: t, c) } } else { Iterator.empty } } }

分析:waiting

/** Adds a transaction with count. */
  def add(t: Iterable[T], count: Long = 1L): this.type = {
    require(count > 0)
    var curr 
= root curr.count += count t.foreach { item => val summary = summaries.getOrElseUpdate(item, new Summary) summary.count += count val child = curr.children.getOrElseUpdate(item, { val newNode = new Node(curr) newNode.item = item summary.nodes += newNode newNode }) child.count
+= count curr = child } this }

分析:waiting

 /** Returns all transactions under this node. */
  private def getTransactions(node: Node[T]): Iterator[(List[T], Long)] = {
    var count = node.count
    node.children.iterator.flatMap { case (item, child) =>
      getTransactions(child).map { case (t, c) =>
        count -= c
        (item :: t, c)
      }
    } ++ {
      if (count > 0) {
        Iterator.single((Nil, count))
      } else {
        Iterator.empty
      }
    }
  }

分析:waiting

基於Spark的FPGrowth源碼中難啃的骨頭