HIVE HSQL 基本操作命令
建立表:
hive>create table tablename(id int,name string,password string);
建立一個名字為tablename的表,表的屬性有int id; string name; string password;
建立表時指定分隔符
hive> create table test1(name string,count int)row format delimited fields terminated by ‘/t’;
載入表
hive> load data inpath ‘/user/hadoop/output7/part-r-00000’ into table test1;
建立一個新表,結構與其他一樣
hive> create table table1 like table2;
建立一個表table1,表結構跟table2一樣;
建立分割槽表
hive> create table table1(id int,line string) partitioned by (dt string,country string);
顯示錶裡有多少條記錄(count 數大於50的有多少條記錄)
hive>select count(*) from tablename where count>50;
排序用法order by (查詢count 數大於50並排序)
select * from test2 where count > 50 order by count;
顯示錶中有多少分割槽
hive> show partitions table1;
顯示所有表
hive> show tables;
顯示所有與u開頭的表
hive> show tables ‘u*’;
顯示錶的結構資訊
hive> describe test1;
修改表名字
hive> alter table table1 rename to test3;
在原表上新新增一列
hive> alter table test1 add columns(new_col2 int comment ‘a commment’);
hive> alter table test1 add columns(new_col3 int);
刪除表
hive> drop table test3;
從本地檔案載入資料:
hive> LOAD DATA LOCAL INPATH ‘/home/hadoop/input/ncdc/micro-tab/sample.txt’ OVERWRITE INTO TABLE records;
載入分割槽表
hive> load data inpath ‘/user/hive/warehouse/clickstream_log/dt=2016-11-29/part-r-00000’ overwrite into table clickstream_log PARTITION(dt = ‘2016-11-30’);
顯示所有函式
hive> show functions;
檢視函式的用法
hive> describe function substr;
檢視陣列、map、結構
hive> select col1[0],col2[‘b’],col3.c from complex;
檢視陣列、map、結構
hive> select col1[0],col2[‘b’],col3.c from complex;
內連線:
hive> SELECT sales., things. FROM sales JOIN things ON (sales.id = things.id);
檢視hive為某個查詢使用多少個MapReduce作業
hive> Explain SELECT sales., things. FROM sales JOIN things ON (sales.id = things.id);
外連線:
hive> SELECT sales., things. FROM sales LEFT OUTER JOIN things ON (sales.id = things.id);
hive> SELECT sales., things. FROM sales RIGHT OUTER JOIN things ON (sales.id = things.id);
hive> SELECT sales., things. FROM sales FULL OUTER JOIN things ON (sales.id = things.id);
in查詢:Hive不支援,但可以使用LEFT SEMI JOIN
hive> SELECT * FROM things LEFT SEMI JOIN sales ON (sales.id = things.id);
Map連線:Hive可以把較小的表放入每個Mapper的記憶體來執行連線操作
hive> SELECT /+ MAPJOIN(things) / sales., things. FROM sales JOIN things ON (sales.id = things.id);
INSERT OVERWRITE TABLE ..SELECT:新表預先存在
hive> FROM records2
> INSERT OVERWRITE TABLE stations_by_year SELECT year, COUNT(DISTINCT station) GROUP BY year
> INSERT OVERWRITE TABLE records_by_year SELECT year, COUNT(1) GROUP BY year
> INSERT OVERWRITE TABLE good_records_by_year SELECT year, COUNT(1) WHERE temperature != 9999 AND (quality = 0 OR quality = 1 OR quality = 4 OR quality = 5 OR quality = 9) GROUP BY year;
CREATE TABLE … AS SELECT:新表表預先不存在
hive>CREATE TABLE target AS SELECT col1,col2 FROM source;
建立檢視:
hive> CREATE VIEW valid_records AS SELECT * FROM records2 WHERE temperature !=9999;
檢視檢視詳細資訊:
hive> DESCRIBE EXTENDED valid_records;
傳統資料庫:
新增:
insert into 表名 values();
修改:
update 表名 set a=b where b=c;
刪除:
delete from 表名where a=b;
查詢:
select * from 表名 where a=b;