用spark分析北京積分落戶資料,按使用者分數分析
阿新 • • 發佈:2019-01-22
按使用者分數分析
#匯入積分落戶人員名單資料 sqlContext = SQLContext(sc) df = sqlContext.read.format('com.databricks.spark.csv').options(header='true', inferschema='true').load('jifenluohu.csv') #print(df) df.createOrReplaceTempView("jflh") #df.show() #按分數分析 #按分數倒序 spark.sql("select score,count(*) as num from jflh group by score order by score desc").show(30) #按分數數量倒序 spark.sql("select score,count(*) as num from jflh group by score order by num desc").show(60) +------+---+ | score|num| +------+---+ |122.59| 1| |121.25| 1| |118.96| 1| |118.21| 1| |117.79| 1| |117.34| 1| |116.17| 1| |116.13| 1| |115.95| 1| |115.91| 1| |115.45| 1| |115.29| 1| |115.25| 1| |115.21| 1| |115.13| 1| |114.88| 1| | 114.5| 1| |114.42| 1| |113.67| 2| |113.45| 1| |113.25| 1| |113.09| 1| |112.66| 1| |112.58| 1| |112.25| 1| |112.17| 1| |112.05| 1| |111.79| 1| |111.75| 1| | 111.7| 1| +------+---+ only showing top 30 rows +-----+---+ |score|num| +-----+---+ | 91.0| 51| | 93.0| 49| |90.96| 47| | 92.0| 44| |91.25| 41| |91.96| 41| | 94.0| 38| |92.46| 38| |91.88| 36| |90.79| 36| |91.67| 35| | 91.5| 35| |92.29| 33| | 94.5| 33| |91.38| 33| |93.21| 33| |92.17| 33| |90.83| 33| |92.71| 33| |91.33| 31| |91.17| 31| |93.63| 31| |91.75| 31| |90.75| 30| |96.21| 30| |91.29| 30| |93.96| 30| |92.96| 30| |91.21| 29| |92.67| 29| |91.46| 29| |91.08| 28| |94.46| 28| |92.21| 28| |91.71| 28| |90.92| 28| |93.46| 28| |91.54| 28| | 92.5| 28| |92.54| 27| |92.08| 27| |94.33| 27| |93.25| 27| |93.33| 27| |93.71| 27| |92.33| 27| |95.79| 26| |92.92| 26| |92.63| 26| |92.75| 26| |92.04| 26| |92.25| 26| |95.21| 26| |91.58| 25| |92.13| 25| |91.79| 25| | 95.0| 25| |93.75| 24| |94.96| 24| |92.58| 24| +-----+---+ only showing top 60 rows