Spark Mlib(一)k-menas
阿新 • • 發佈:2018-12-20
spark官網給出的k-means的實現方式,原地址http://spark.apache.org/docs/latest/ml-clustering.html
package alg
import org.apache.spark.ml.clustering.KMeans
import org.apache.spark.ml.evaluation.ClusteringEvaluator
import org.apache.spark.sql.SparkSession
object k_means {
def main(args:Array[String]):Unit={
val spark: SparkSession = SparkSession.builder
.appName("My")
.master("local[*]")
.getOrCreate()
// Loads data.
val dataset = spark.read.format("libsvm").load("data/mllib/sample_kmeans_data.txt")
// Trains a k-means model.
val kmeans = new KMeans().setK(2).setSeed(1L)
val model = kmeans.fit(dataset)
// Make predictions
val predictions = model.transform(dataset)
// Evaluate clustering by computing Silhouette score
val evaluator = new ClusteringEvaluator()
val silhouette = evaluator.evaluate(predictions)
println(s"Silhouette with squared euclidean distance = $silhouette" )
// Shows the result.
println("Cluster Centers: ")
model.clusterCenters.foreach(println)
}
}