第76課:Spark SQL實戰使用者日誌的輸入匯入Hive及SQL計算PV實戰
阿新 • • 發佈:2018-12-26
內容:
1.Hive資料匯入操作
2.SparkSQL對資料操作實戰
一、Hive資料匯入操作
create table userLogs(date String,timestamp bigint,userID bigint,pageID bigint,channel String,action String);
load data local inpath '/home/hadoop/learnSpark/SparkSQLDataManually/userLogs.log' into table row format delimited fields terminated by '\t' lines terminated by '\n';
二、SparkSQL對資料操作實戰
package SparkSQL; import org.apache.spark.SparkConf; import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.sql.hive.HiveContext; import java.text.SimpleDateFormat; import java.util.Calendar; import java.util.Date; /** * FileName: SparkSQLUserLogsOps * Author: hadoop * Email:
[email protected] * Date: 18-11-12 下午10:19 * Description: */ public class SparkSQLUserLogsOps { public static void main(String[] args){ //建立SparkConf用於讀取系統資訊並設定運用程式的名稱 SparkConf conf = new SparkConf().setAppName("SparkSQLUserLogsOps").setMaster("spark://Master:7077"); //建立JavaSparkContext物件例項作為整個Driver的核心基石 JavaSparkContext sc = new JavaSparkContext(conf); //設定輸出log的等級 sc.setLogLevel("INFO"); //建立SQLContext上下文物件,用於SqL的分析 HiveContext hiveContext = new HiveContext(sc.sc()); String twodaysago = getTwodaysago(); pvStatistic(hiveContext,twodaysago); } private static void pvStatistic(HiveContext hiveContext, String twodaysago) { hiveContext.sql("use hive"); String sqlText = "select date,pageID,pv " +" from (select date,pageID,count(*) pv from userlogs " + "where action = 'view' and date = 'twodaysago' group by date,pageID ) subqurey order by pv desc limit 10"; hiveContext.sql(sqlText).show(); } private static String getTwodaysago() { SimpleDateFormat date = new SimpleDateFormat("yyyy-MM-dd"); Calendar calender = Calendar.getInstance(); calender.setTime(new Date()); calender.add(Calendar.DATE,-2); Date yesterday = calender.getTime(); return date.format(yesterday); } }