Hive-之Load data into table[partition](hdfs -> ods ,ods -> dw)
阿新 • • 發佈:2021-01-25
技術標籤:Hive
Hive-之Load data into table[partition]
1 從HDFS到ODS層
--建立表,確定schema和各種format
DROP TABLE IF EXISTS shufang.students;
CREATE TABLE IF NOT EXISTS shufang.students(
id int ,
name string,
create_time string
) partitioned by (dt string) --指定分割槽表
row format delimited fields terminated by '\t' --指定欄位分隔符
STORED AS
INPUTFORMAT 'com.hadoop.mapred.DeprecatedLzoTextInputFormat' --指定INPUTFORMAT,就是從
OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION '/user/hive/warehouse/shufang.db/students' --指定在表在HDFS中的儲存路徑
;
--匯入資料,是不走MR的,只需要將指定的目錄中的檔案移動到分割槽目錄下
LOAD DATA INPATH '/origin_data/db/shufang/students/2021-01-18' INTO TABLE shufang.students PARTITION(dt = '2021-01-18');
--如果是flume過來的日誌資料,由於只做了壓縮,還不支援切片,所以我們需要load之後將資料建立索引支援切片
hadoop jar /opt/module/hadoop-2.7.7/share/hadoop/common/hadoop-lzo-0.4.21-SNAPSHOT.jar \
com.hadoop.compression.lzo.DistributedLzoIndexer\
/user/hive/ warehouse/shufang.db/students/dt=2021-01-18
2 從ODS到DWD
CREATE TABLE IF NOT EXISTS student1(
id int ,
name string,
create_time string
) COMMENT 'parquet store table,parquet is born to support split'
PARTITIONED BY(dt string) --指定分割槽鍵
STORED AS parquet --指定儲存,底層還是inputformat 和 outputformat
LOCATION '/user/hive/warehouse/shufang.db/student1' --指定儲存路徑
TBLPROPERTIES('parquet.compression' = 'lzo'); --指定表屬性,為parquet指定壓縮格式
INSERT OVERWRITE TABLE student1 PARTITION(dt = '2021-01-18')
SELECT
id,
name,
create_time
FROM students
WHERE dt='2021-01-18';