hive的基本語法操作

阿新 • • 發佈：2018-12-21

基本操作

DLL操作

資料庫- -----1. 建立資料庫：預設儲存在：HDFS預設位置：/user/hive/warehouse/資料庫名.db

create database if not exists hive;

-----2. 檢視資料庫

show databases;

–如果資料庫非常多，可以使用正則表示式，例如檢視以“h”開頭的資料庫：

show databases like 'h*';

-----3. 檢視資料庫資訊

describe database hive;

-----4. 刪除資料庫

drop database if exists hive;
#強制刪除資料庫（非空資料庫）
drop database if exists hive cascade;

二. 表

建立表

建立表之前，最好使用use 資料庫名；選擇資料庫，否則表會預設建立在default資料庫中；

–（1）建立內部表

create table if not exists student(
id int,
name string,
age int)
row format delimited fields terminated by '\t'
stored as textfile;
#指定列的分隔符，預設是^A，需要根據上傳檔案的分隔符確定。預設的行分隔符是'\n';
#指定儲存的格式
#預設的儲存的路徑

–（2）建立外部表

create external table if not exists stu_external2(
id int,
name string,
age int)
row format delimited fields terminated by '\t'
location '/shiny/hive.db/stu_external2';
#指定儲存的路徑

一級分割槽

create table if not exists stu_partition(
id int,
name string,
age int)
partitioned by(sex string)
row format delimited fields terminated by '\t';
#partitioned by指定按照那個欄位分割槽，這個欄位不能在定義表的時候定義

– 二級分割槽

create table if not exists stu_partition2(
id int,
name string,
age int)
partitioned by(classname string,sex string)
row format delimited fields terminated by '\t';

—插入女分割槽資料------

load data local inpath '/home/shiny/Desktop/data/female.txt' into table stu_partition partition(sex='female');
load data local inpath '/home/shiny/Desktop/data/female.txt' into table stu_partition2 partition(classname='1101',sex='female');

—插入男分割槽資料------

load data local inpath '/home/shiny/Desktop/data/male.txt' into table stu_partition partition(sex='male');
load data local inpath '/home/shiny/Desktop/data/male.txt' into table stu_partition2 partition(classname='1101',sex='male');

----查詢表的所有資料------

select * from stu_partition;

----查詢表分割槽-----------

show partitions stu_partition;

–（4）建立分桶表

create table if not exists stu_buck(
id int,
name string,
age int)
clustered by(id) sorted by(id desc) into 3 buckets
row format delimited fields terminated by '\t';
#指定分桶的欄位，排序的欄位，桶的數量

–插入資料

insert into table stu_buck select * from student distribute by (id) sort by (id desc);

------2. 修改表 –（1）重命名錶

alter table student rename to stu_internal;

–（2）增加列

alter table stu_partition add columns (address string);

—查看錶結構-----

desc stu_partition;

—查看錶結構詳細資訊 desc formatted stu_partition;

–（3）改變列 alter table stu_partition change id number string;

–（4）替換/刪除列 alter table stu_partition replace columns(id int,name string,age int);

–（5）新增分割槽------ alter table stu_partition add partition(sex=‘weizhi’);

–（6）刪除分割槽------ alter table stu_partition drop partition(sex=‘weizhi’);

------3. 刪除表 drop table if exists stu_external2;

–顯示當前資料庫中所有的表 show tables;

---------1. Load裝載資料------------------- –（1）載入本地資料（複製資料） load data local inpath ‘/home/shiny/Desktop/data/female.txt’ into table stu_internal;

–（2）載入HDFS資料（移動資料） load data inpath ‘/data/male.txt’ into table stu_internal;

–（3）載入本地資料覆蓋表中內容 load data local inpath ‘/home/shiny/Desktop/data/female.txt’ overwrite into table stu_internal;

---------2. INSERT插入資料----------------- –（1）單條插入(一般不使用) insert into table stu_internal values(1116,‘bob’,23);

–（2）利用查詢語句將結果匯入新表（新表必須事先手動建立） – 複製表（只是複製現有的表結構，不復制資料） create table student like stu_internal;

– 將資料匯入新表

insert overwrite/into table student select * from stu_internal where age>=23;

–（3）多重插入（新表事先建立）

# 新建表
create table stu_insert(
id int,
name string)
row format delimited fields terminated by '\t';

# 實現多重插入
from stu_internal
insert into table student select * where age<23
insert into table stu_insert select id,name;

–（4）CTAS（create table … as select …）（新表不用事先手動建立）如果select語句查詢由於某種原因而失敗，新表是不會建立的。

create table stu_ctas as select id,age from stu_internal where age<23;

-----3. INSERT匯出資料（注意是overwrite，不能使用into）----------------- –（1）單模式匯出：匯出到本地（^A(ctrl+A)為列分隔符，\n為行分隔符）

insert overwrite local directory '/home/shiny/Desktop/data/student' select * from student;

–（2）單模式匯出：匯出到HDFS（^A(ctrl+A)為列分隔符，\n為行分隔符）

insert overwrite directory '/student' select * from student;

----4. SELECT查詢資料--------------------- –建立表

create table if not exists score(
id int,
name string,
course string,
score int)
row format delimited fields terminated by '\t';

#載入本地資料
load data local inpath '/home/shiny/Desktop/data/score.txt' into table score;

–（1）GROUP BY:查詢每位學生總成績 – 注意：在Group by子句中，Select查詢的列，要麼需要是Group by中的列，要麼得是用聚合函式（比如sum、count等）加工過的列。不支援直接引用非Group by的列。

select id,name,sum(score) as count from score group by id,name;

–（2）ORDER BY:獲取全級總成績最高的學生資訊（全域性排序）預設是升序排序asc

select id,name,sum(score) as count from score group by id,name order by count desc limit 1;

–（3）SORT BY：查詢學生資訊，按照id降序排序（區域性排序） –設定reduce的個數為2

set mapreduce.job.reduces=2;
create table stu_sort as select * from student sort by id desc;

–（4）先對age進行降序排序，age相同的情況下對id進行降序排序 –DISTRIBUTE BY + SORT BY：分桶和排序的組合操作，對id進行分桶，對age，id進行降序排序

-- 指定開啟分桶
SET hive.enforce.bucketing = true;
-- 指定 reducetask 數量，也就是指定桶的數量
SET mapreduce.job.reduces=3;
insert overwrite local directory '/home/shiny/Desktop/data/distr' 
select * from student distribute by (id) sort by (age desc,id desc);

–（5）對id進行分桶，對id進行升序排序 – CLUSTER BY：分桶和排序的組合操作，等於DISTRIBUTE BY + SORT BY（前提：分桶和SORT欄位是同一個）。

insert overwrite local directory '/home/shiny/Desktop/data/cluster'
select * from student cluster by (id); -- 等價於distribute by id sort by id

join連線

#內連線：顯示符合條件的連線
select * from studenta a join studentb b on a.id=b.id;
#左外連線,以左表位基準，匹配不上的null
select * from studenta a left join studentb b on a.id=b.id;
#右外連線，以右表為基準
select * from studenta a right join studentb b on a.id=b.id;
#全外連線,以兩個表為標準，並去重
select * from student a full join studentb b on a.id=b.id;
#左半連線：只顯示匹配成功後左表的資料
select * from studenta a left semi join studentb b on a.id=b.id;

hive的基本語法操作

基本操作

join連線

hive的基本語法操作

HBase- ddl（表操作）、dml（記錄操作）的基本語法操作

Hive基本shell操作

hive 基本命令操作

HIVE基本語法使用

008-Hadoop Hive sql語法詳解3-DML 操作:元數據存儲

hive基本操作與應用

Hive基本操作與案例

Hive基本操作

NSISI基本語法---登錄檔的操作

004-hive基本操作

Hive基本操作，DDL操作(建立表，修改表，顯示命令)，DML操作(Load Insert Select),Hive Join,Hive Shell引數(內建運算子、內建函式)等

Hive基本操作——DML操作

MySQL操作基本語法

hive基本操作指令

JS基本語法（二）——DOM操作

oracle最基本的操作語法

HIVE SQL 基本語法

Oracle的基本語法和操作

Matlab基本語法和基本操作

hive的基本語法操作

基本操作

join連線

相關推薦