數據定義和描述
阿新 • • 發佈:2018-10-22
ace employee comment ram internal osi 自動創建 ted pop
create table employee ( name string, work_place array<string>, -- 調用樣式 array_name[0] gender_age struct<gender:string, age:int>, --struct<col_name:type, ...> 類似於Hbase的family,調用樣式 sruct_name.col_name skills_score map<string, int>, -- map_name[key] apart_title map<string, array<string>> ) row format delimited fields terminated by "|" collection items terminated by "," map keys terminated by ":"; !table employee --不用 !column employee --不用 describe formatted employee; --用這個,可讀性更好 load data local inpath "/home/centos/hive essential/ch03/employee.txt" overwrite into table employee; # query the whole array select work_place from employee; select work_place[0] as col_1, work_place[1] as col_2, work_place[2] as col_3 from employee; #query the whole map select gender_age from employee; select gender_age.gender , gender_age.age from employee; #query the whole struct and each column in table: select skills_score from employee; select name, skills_score["DB"] as DB, skills_score["Perl"] as Perl, skills_score["Python"] as Python, skills_score["Sales"] as Sales, skills_score["HR"] as HR from employee; #query composite type select apart_title from employee; select name, apart_title["Product"] as Product, apart_title["Test"] as Test, apart_title["COE"] as COE, apart_title["Sales"] as Sales from employee; DDL 操作數據庫 create database if not exists myhivebook comment "cho3 hive database in practice" --添加描述 location "/hdfs/hive" --hdfs上的路徑 with dbproperties ("name"="MengRui", "date"="2018-08-20"); show databases; describe database myhivebook; --打印出指定數據庫的信息 use myhivebook; drop database if exists myhivebook; --刪除空庫 drop database if exists myhivebook cascade; --刪除含表的庫 alter database myhivebook --設置數據庫屬性 set dbproperties ("edited by"="dog"); alter database myhivebook set owner user dog; 操作表 create external table external_employee ( name string, work_place array<string>, -- 調用樣式 array_name[0] gender_age struct<gender:string, age:int>, --struct<col_name:type, ...> 調用樣式 sruct_name.col_name skills_score map<string, int>, -- map_name[key] apart_title map<string, array<string>> ) comment "this is a external table" --屬性位置固定,否則會報錯 row format delimited fields terminated by "|" collection items terminated by "," map keys terminated by ":" stored as textfile -- location "/user/ch03/employee"; --此路徑下不能包含其他文件夾,否則,在查詢時會出錯。若路徑不存在,Hive會自動創建路徑 load data local inpath "/home/centos/hive essential/ch03/employee.txt" overwrite into table external_employee; create temporary table temporary_name... ??? --CTAS copy metadata and data to new table create table ctas_employee as select * from external_employee; --創建CTE 男性中選出名為"Michael"的姓名,並且選擇出所有女性的姓名 create table cte_employee as -- CTAS with r1 as (select name from r2 where name = "Michael"), --CTE r2 as (select name from employee where agender_age.agender = "Male"), r3 as (select name from employee where agender_age.agender = "Female") select * from r1 union all select * from r3; select * from cte_employee; --創建空表 //create table empty_ctas_employee as --CTAS 會使用mapper,耗時不推薦 //select * from employee where 1 = 2; create table empty_like_employee -- use LIKE only metadata replication like employee; --like [table or view] -- 統計行數 select count(*) as row_counts from employee; -- 完全地刪除內部表, removes the metadata completely and moves date to Trash. drop table if exists empty_ctas_employee; -- remove all the rows from a internal table truncate table cte_employee; ALTER 只改變元數據 --alter table rename alter table internal_employee to empty_employee; alter table employee set --添加或更新表屬性 tblproperties("comment" = "this is internal table"); alter table employee set serdeproperties("field.delim" = "$"); alter table employee set location "hdfs://mycluster/user/hive/warehouse/new_employee"; -- 設置路徑,hive不會自動創建路徑,路徑必須為hdfs中的絕對路徑 alter table external_employee partition(year = 2012, month = 1, day = 1) enable no_drop; --阻止刪除分區表 alter table external_employee enable off_line; -- 阻止查詢分區表中的data(not metadata) alter table employee concatenate; --merge small files into larger files,only RCFile and ORCFile Formats are supportted right now alter table employee set fileformat rcfile; --設置文件格式 alter table employee set fileformat textfile; --check column type desc employee; alter table empty_employee --下述操作只改變元數據,數據必須與更新後的字段匹配 change column name employee_name string -- change the column after work_place; -- move the column alter table empty_employee add columns (wife string); --添加新列 alter table empty_employee replace columns(wife string); --替換掉原來的所有列為單個列 分區表 --創建分區表,極大地降低查詢時的時間和帶寬 create table partition_employee ( name string, work_place array<string>, -- 調用樣式 array_name[0] gender_age struct<gender:string, age:int>, --struct<col_name:type, ...> 調用樣式 sruct_name.col_name skills_score map<string, int>, -- map_name[key] apart_title map<string, array<string>> ) partitioned by (year int, month int) row format delimited fields terminated by "|" collection items terminated by "," map keys terminated by ":"; --檢查分區 show partitions partition_employee; --1)首次創建表時無分區,需要手動添加分區 alter table partition_employee add partition (year = 2017, month = 07) partition (year = 2017, month = 08); --2)load data into partitions load data local inpath "/home/centos/hive essential/ch03/employee.txt". -- local:從本地文件系統加載數據 overwrite into table partition_employee partition (year = 2017, month = 7); -- 查詢分區數據時,需先設置: hive.strict.checks.large.query=false hive.mapred.mode=nonstrict -- drop the partition alter table partition_employee drop if exists partition (year = 2017, month = 7); 分桶表 --1)Prepare another dataset and table for bucket table create table employee_id ( name string, employee_id int, -- bucket column work_place array<string>, -- 調用樣式 array_name[0] gender_age struct<gender:string, age:int>, --struct<col_name:type, ...> 調用樣式 sruct_name.col_name skills_score map<string, int>, -- map_name[key] apart_title map<string, array<string>> ) row format delimited fields terminated by "|" collection items terminated by "," map keys terminated by ":"; load data local inpath "/home/centos/hive essential/ch03/employee_id.txt" overwrite into table employee_id; --2)create bucket table create table employee_id_buckets ( name string, employee_id int, -- bucket column work_place array<string>, -- 調用樣式 array_name[0] gender_age struct<gender:string, age:int>, --struct<col_name:type, ...> 調用樣式 sruct_name.col_name skills_score map<string, int>, -- map_name[key] apart_title map<string, array<string>> ) clustered by (employee_id) into 2 buckets --桶的容量:near two blocks of data(256M) 桶的數量:2N row format delimited fields terminated by "|" collection items terminated by "," --tuple1,tuple2,... map keys terminated by ":"; -- 3) set map.reducer.max.tasks = 2; --reducer的數量等於桶數 set hive.enforce.bucketing = true; -- 4)populate data into buckets insert overwrite table employee_id_buckets -- insert的作用??: 根據元數據校驗數據 select * from employee_id; -- 5) verify the buckets in the HDFS dfs -ls /user/hive/warehouse/employee_id_buckets; 視圖 降低查詢的復雜性,增加數據安全性
參考書籍
Programming_Hive
Apache Hive Essentials
數據定義和描述