HIVE基本語法使用
阿新 • • 發佈:2019-02-06
set hive.cli.print.header=true;
建立一張表
CREATE TABLE page_view(viewTime INT, userid BIGINT,
page_url STRING, referrer_url STRING,
ip STRING COMMENT 'IP Address of the User')
#新增描述
COMMENT 'This is the page view table'
#新增分割槽
PARTITIONED BY(dt STRING, country STRING)
#制定檔案的分隔符
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t'
#
STORED AS SEQUENCEFILE;
TEXTFILE
以小檔案形式儲存
//sequencefile
create table tab_ip_seq(id int,name string,ip string,country string)
row format delimited
fields terminated by ','
stored as sequencefile;
insert overwrite table tab_ip_seq select * from tab_ext;
load資料,載入到hive的目錄中去
//create & load
create table tab_ip(id int,name string,ip string,country string)
row format delimited
fields terminated by ','
stored as textfile;
load data local inpath '/home/hadoop/ip.txt' into table tab_ext;
載入資料,以連結的方式來建立
//external
CREATE EXTERNAL TABLE tab_ip_ext(id int, name string,
ip STRING,
country STRING)
ROW FORMAT DELIMITED FIELDS TERMINATED BY ','
STORED AS TEXTFILE
LOCATION '/external/hive';
// CTAS 用於建立一些臨時表儲存中間結果
CREATE TABLE tab_ip_ctas
AS
SELECT id new_id, name new_name, ip new_ip,country new_country
FROM tab_ip_ext
#sort by 是幹嘛的,跟Oder by 有什麼關係
SORT BY new_id;
//insert from select 用於向臨時表中追加中間結果資料
create table tab_ip_like like tab_ip;
insert overwrite table tab_ip_like
select * from tab_ip;
//CLUSTER <–相對高階一點,你可以放在有精力的時候才去學習>
create table tab_ip_cluster(id int,name string,ip string,country string)
clustered by(id) into 3 buckets;
分桶
load data local inpath '/home/hadoop/ip.txt' overwrite into table tab_ip_cluster;
set hive.enforce.bucketing=true;
insert into table tab_ip_cluster select * from tab_ip;
select * from tab_ip_cluster tablesample(bucket 2 out of 3 on id);
//PARTITION分割槽
create table tab_ip_part(id int,name string,ip string,country string)
partitioned by (part_flag string)
row format delimited fields terminated by ',';
load data local inpath '/home/hadoop/ip.txt' overwrite into table tab_ip_part
partition(part_flag='part1');
load data local inpath '/home/hadoop/ip_part2.txt' overwrite into table tab_ip_part
partition(part_flag='part2');
select * from tab_ip_part;
select * from tab_ip_part where part_flag='part2';
select count(*) from tab_ip_part where part_flag='part2';
alter table tab_ip change id id_alter string;
ALTER TABLE tab_cts ADD PARTITION (partCol = 'dt') location '/external/hive/dt';
show partitions tab_ip_part;
//write to hdfs把資料寫到hdfs中
insert overwrite local directory '/home/hadoop/hivetemp/test.txt' select * from tab_ip_part where part_flag='part1';
insert overwrite directory '/hiveout.txt' select * from tab_ip_part where part_flag='part1';
//array 陣列的使用
create table tab_array(a array<int>,b array<string>)
row format delimited
fields terminated by '\t'
collection items terminated by ',';
示例資料
tobenbrone,laihama,woshishui 13866987898,13287654321
abc,iloveyou,itcast 13866987898,13287654321
select a[0] from tab_array;
select * from tab_array where array_contains(b,'word');
insert into table tab_array select array(0),array(name,ip) from tab_ext t;
//map的使用
create table tab_map(name string,info map<string,string>)
row format delimited
fields terminated by '\t'
collection items terminated by ';'
map keys terminated by ':';
示例資料:
fengjie age:18;size:36A;addr:usa
furong age:28;size:39C;addr:beijing;weight:180KG
load data local inpath '/home/hadoop/hivetemp/tab_map.txt' overwrite into table tab_map;
insert into table tab_map select name,map('name',name,'ip',ip) from tab_ext;
//struct結構體
create table tab_struct(name string,info struct<age:int,tel:string,addr:string>)
row format delimited
fields terminated by '\t'
collection items terminated by ','
load data local inpath '/home/hadoop/hivetemp/tab_st.txt' overwrite into table tab_struct;
insert into table tab_struct select name,named_struct('age',id,'tel',name,'addr',country) from tab_ext;
怎麼在shell語句中執行,可以用於批處理
hive -S -e 'select country,count(*) from tab_ext' > /home/hadoop/hivetemp/e.txt
有了這種執行機制,就使得我們可以利用指令碼語言(bash shell,python)進行hql語句的批量執行
select * from tab_ext sort by id desc limit 5;
select a.ip,b.book from tab_ext a join tab_ip_book b on(a.name=b.name);
//UDF自定義函式的使用
select if(id=1,first,no-first),name from tab_ext;
hive>add jar /home/hadoop/myudf.jar;
hive>CREATE TEMPORARY FUNCTION my_lower AS 'org.dht.Lower';
select my_upper(name) from tab_ext;