Spark Mlib(七)用spark實現LogisticRegression
阿新 • • 發佈:2018-11-13
logistic迴歸又稱logistic迴歸分析,是一種廣義的線性迴歸分析模型,常用於資料探勘,疾病自動診斷,經濟預測等領域.以下是spark中該演算法的實現方式,原地址為http://spark.apache.org/docs/latest/mllib-linear-methods.html#classification
package alg
import org.apache.spark.{SparkConf, SparkContext}
import org.apache.spark.mllib.classification.LogisticRegressionWithLBFGS
import org.apache.spark.mllib.evaluation.MulticlassMetrics
import org.apache.spark.mllib.regression.LabeledPoint
import org.apache.spark.mllib.util.MLUtils
object logisticRegression {
def main(args:Array[String]):Unit={
val sparkConf = new SparkConf().setMaster("local").setAppName("testTansformition" )
val sc = new SparkContext(sparkConf)
//1.載入資料
val data = MLUtils.loadLibSVMFile(sc, "data/mllib/sample_libsvm_data.txt")
//2.劃分訓練集和測試集
val splitData=data.randomSplit(Array(0.6,0.4),seed=11L)
val training=splitData(0).cache()
val test=splitData(1)
val model=new LogisticRegressionWithLBFGS ()
.setNumClasses(10)
.run(training)
//4.在測試集上驗證
val predictionAndLabels = test.map { case LabeledPoint(label, features) =>
val prediction = model.predict(features)
(prediction, label)
}
val metrics=new MulticlassMetrics(predictionAndLabels)
val accuracy=metrics.accuracy
println(s"Accuracy=$accuracy")
model.save(sc, "target/tmp/scalaLogisticRegressionWithLBFGSModel")
}
}