1. 程式人生 > 其它 >Hive-之Load data into table[partition](hdfs -> ods ,ods -> dw)

Hive-之Load data into table[partition](hdfs -> ods ,ods -> dw)

技術標籤:Hive

Hive-之Load data into table[partition]

1 從HDFS到ODS層

--建立表,確定schema和各種format
DROP TABLE IF EXISTS shufang.students;
CREATE TABLE IF NOT EXISTS shufang.students(
    id int ,
    name string,
    create_time string 
) partitioned by (dt string) --指定分割槽表
row format delimited fields terminated
by '\t' --指定欄位分隔符 STORED AS INPUTFORMAT 'com.hadoop.mapred.DeprecatedLzoTextInputFormat' --指定INPUTFORMAT,就是從 OUTPUTFORMAT 'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat' LOCATION '/user/hive/warehouse/shufang.db/students' --指定在表在HDFS中的儲存路徑 ; --匯入資料,是不走MR的,只需要將指定的目錄中的檔案移動到分割槽目錄下 LOAD DATA
INPATH '/origin_data/db/shufang/students/2021-01-18' INTO TABLE shufang.students PARTITION(dt = '2021-01-18'); --如果是flume過來的日誌資料,由於只做了壓縮,還不支援切片,所以我們需要load之後將資料建立索引支援切片 hadoop jar /opt/module/hadoop-2.7.7/share/hadoop/common/hadoop-lzo-0.4.21-SNAPSHOT.jar \ com.hadoop.compression.lzo.DistributedLzoIndexer\ /user/hive/
warehouse/shufang.db/students/dt=2021-01-18

2 從ODS到DWD

CREATE TABLE IF NOT EXISTS student1(
id int ,
name string,
create_time string
) COMMENT 'parquet store table,parquet is born to support split'
PARTITIONED BY(dt string) --指定分割槽鍵
STORED AS parquet --指定儲存,底層還是inputformat 和 outputformat
LOCATION '/user/hive/warehouse/shufang.db/student1' --指定儲存路徑
TBLPROPERTIES('parquet.compression' = 'lzo'); --指定表屬性,為parquet指定壓縮格式



INSERT OVERWRITE TABLE student1 PARTITION(dt = '2021-01-18') 
SELECT 
id,
name,
create_time
FROM students 
WHERE dt='2021-01-18';