hadoop學習--基於Hive的Hadoop日誌分析
阿新 • • 發佈:2019-02-03
本文將本地的hadoop日誌,載入到Hive資料倉庫中,再過濾日誌中有用的日誌資訊轉存到Mysql資料庫裡。
環境:hive-0.12.0 + Hadoop1.2.1
1、日誌格式
2014-04-17 22:53:30,621 INFO org.apache.hadoop.hdfs.StateChange: BLOCK* addToInvalidates: blk_4856124673669777083 to 127.0.0.1:50010 2014-04-17 22:53:30,621 INFO org.apache.hadoop.hdfs.StateChange: BLOCK* addToInvalidates: blk_3952951948255939678 to 127.0.0.1:50010 2014-04-17 22:53:30,629 INFO org.apache.hadoop.hdfs.StateChange: BLOCK* addToInvalidates: blk_-7659295115271387204 to 127.0.0.1:50010 2014-04-17 22:53:30,713 INFO org.apache.hadoop.hdfs.StateChange: BLOCK* addToInvalidates: blk_1247712269990837159 to 127.0.0.1:50010 2014-04-17 22:53:30,713 INFO org.apache.hadoop.hdfs.StateChange: BLOCK* addToInvalidates: blk_3041853621376201540 to 127.0.0.1:50010 2014-04-17 22:53:30,713 INFO org.apache.hadoop.hdfs.StateChange: BLOCK* addToInvalidates: blk_5165085626508844330 to 127.0.0.1:50010 2014-04-17 22:53:30,713 INFO org.apache.hadoop.hdfs.StateChange: BLOCK* addToInvalidates: blk_5058515618805863349 to 127.0.0.1:50010
表結構定義:
2、程式碼://建立Hive表,用來儲存日誌資訊 HiveUtil.createTable("create table if not exists loginfo11 ( rdate String,time ARRAY<string>,type STRING,relateclass STRING, information1 STRING,information2 STRING,information3 STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' ' COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':'");
GetConnect.Java 類負責建立與Hive、Mysql的連線與關閉;
//package com.my.hivetest; import java.sql.Connection; import java.sql.DriverManager; import java.sql.ResultSet; import java.sql.SQLException; import java.sql.Statement; public class getConnect { private static Connection conn = null; private static Connection conntomysql = null; private getConnect() { } public static Connection getHiveConn() throws SQLException { if (conn == null) { try { Class.forName("org.apache.hadoop.hive.jdbc.HiveDriver"); } catch (ClassNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); System.exit(1); } conn = DriverManager.getConnection( "jdbc:hive://localhost:50031/default", "", ""); System.out.println(1111); } return conn; } public static Connection getMysqlConn() throws SQLException { if (conntomysql == null) { try { Class.forName("com.mysql.jdbc.Driver"); } catch (ClassNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); System.exit(1); } conntomysql = DriverManager.getConnection( "jdbc:mysql://localhost:3306/hadoop?createDatabaseIfNotExist=true&useUnicode=true&characterEncoding=GBK", "root", "123456"); System.out.println(1111); } return conntomysql; } public static void closeHive() throws SQLException { if (conn != null) conn.close(); } public static void closemysql() throws SQLException { if (conntomysql != null) conntomysql.close(); } }
hiveUtil.java類,用來建立Hive表、載入資料、依據條件查詢資料。以及將資料存到mysql中的方法。
//package com.my.hivetest;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
public class HiveUtil {
//建立hive表
public static void createTable(String hiveql) throws SQLException{
Connection con=getConnect.getHiveConn();
Statement stmt = con.createStatement();
ResultSet res = stmt.executeQuery(hiveql);
}
//查詢hive表
public static ResultSet queryHive(String hiveql) throws SQLException{
Connection con=getConnect.getHiveConn();
Statement stmt = con.createStatement();
ResultSet res = stmt.executeQuery(hiveql);
return res;
}
//載入資料
public static void loadDate(String hiveql) throws SQLException{
Connection con=getConnect.getHiveConn();
Statement stmt = con.createStatement();
ResultSet res = stmt.executeQuery(hiveql);
}
//轉存到mysql中
public static void hiveTomysql(ResultSet Hiveres) throws SQLException{
Connection con=getConnect.getMysqlConn();
Statement stmt = con.createStatement();
while (Hiveres.next()) {
String rdate=Hiveres.getString(1);
String time=Hiveres.getString(2);
String type=Hiveres.getString(3);
String relateclass=Hiveres.getString(4);
String information=Hiveres.getString(5)+Hiveres.getString(6)+Hiveres.getString(7);//資訊組合
System.out.println(rdate+" "+time+" "+type+" "+relateclass+" "+information+" ");
int i = stmt.executeUpdate(
"insert into hadooplog values(0,'"+rdate+"','"+time+"','"+type+"','"+relateclass+"','"+information+"')");
}
}
}
exeHiveQL.java類,執行類,實現main函式。
//package com.my.hivetest;
import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.sql.Statement;
public class exeHiveQL {
public static void main(String[] args) throws SQLException {
if (args.length < 2) {
System.out.print("請輸入查詢條件: 日誌級別 日期");
System.exit(1);
}
String type = args[0];
String date = args[1];
//在hive中建立表
HiveUtil.createTable(
"create table if not exists loginfo11
( rdate String,time ARRAY<string>,type STRING,
relateclass STRING,information1 STRING,information2 STRING,
information3 STRING) ROW FORMAT DELIMITED FIELDS TERMINATED BY ' '
COLLECTION ITEMS TERMINATED BY ',' MAP KEYS TERMINATED BY ':'");
//載入hadoop日誌
HiveUtil.loadDate("load data local inpath '/root/hadoop-1.2.1/logs/*.log.*' overwrite into table loginfo11");
//查詢有用的資訊
//test code
String str = "select rdate,time[0],type,relateclass,information1,information2,information3 from loginfo11 where type='"
+ type + "' and rdate='" + date + "' ";
System.out.println(str + "----test");
ResultSet res1 = HiveUtil.queryHive(
"select rdate,time[0],type,relateclass,information1,
information2,information3 from loginfo11 where type='"+ type + "' and rdate='" + date + "' ");
//查詢結果轉存到mysql中
HiveUtil.hiveTomysql(res1);
//關閉hive連線
getConnect.closeHive();
//關閉mysql連線
getConnect.closemysql();
}
}
在執行之前 需要開啟hive server服務,這裡埠號 50031 要與GetConnect.Java 類的一致。
# bin/hive --service hiveserver -p 50031然後在eclipse中執行起來,設定輸入引數
ERROR 2014-04-14不過在執行之前還需要匯入各種包:
hive-jdbc-0.12.0.jar
hive-metastore-0.12.0.jar
以及hive/lib下所有的包。。。(為圖省事全匯入了。。。)
還有一點就是,要預先在mysql中建立資料庫hadoop,並且在裡面建立表hadooplog。表的格式與程式碼中的保持一致即可。。
參考資料:
《Hadop:開啟通向雲端計算的捷徑(劉鵬)》