hive資料庫基礎學習
阿新 • • 發佈:2019-02-13
which hive
cd /usr/local/hive/
ls
cd conf
ls
vi hive-site.xml
查詢mysql中的內容
ssh bigdata003
mysql -u hive -phive2017 -D hive
show tables;
select * from DBS \G;
exit
ssh bigdata001
//建立自己的資料庫
create databases luolin;
use luolin;
//建立一個citys表
CREATE TABLE citys(
province_code INT,
province_name string,
city_code INT,
city_name string
)
row FORMAT delimited
fields terminated by ','
lines terminated by '\n';
//載入需要處理的資料檔案
load data local inpath '/home/bigdata/hive/city.txt' into table citys;
//sql基礎回顧
select province_name,count(city_name) as total from citys group by province_name order by total desc limit 10;
select distinct province_name from citys;
select count(*) from ( select province_name from citys group by province_name ) a;
select count(distinct province_name) from citys;
//建立一個city_ex表
CREATE TABLE city_ex(
province_code int,
province_name string,
city_code int,
city_name string
)
ROW FORMAT DELIMITED FIELDS
TERMINATED BY ','
LINES TERMINATED BY '\n'
LOCATION '/user/luo/city/';
//檢查四川的資料
select * from city_ex where province_name = '四川';
// 建立一個user表
CREATE TABLE user(
uid INT,
city_code INT,
model string,
access string
)
row FORMAT delimited
fields terminated by ','
lines terminated by '\n';
//載入遠端資料
load data local inpath '/home/bigdata/tanqi/hive/user.txt' into table user;
//建立分割槽表
CREATE TABLE user_daily(
uid INT,
city_code INT,
model string,
access string
)
partitioned by (p_date string);
//插入分割槽
INSERT OVERWRITE TABLE user_daily PARTITION (p_date='2017-09-01') SELECT * FROM user;
//顯示分割槽
show partitions user_daily;
//動態插入分割槽
set hive.exec.dynamic.partition.mode=nonstrict;
insert overwrite table user_daily partition(p_date)
select *,'2017-09-02' from user union all select *,'2017-09-03' from user;
//修改分割槽
alter table user_daily partition (p_date = '2017-09-01') rename to partition (p_date = '20170901');
//刪除分割槽
alter table user_daily drop partition ( p_date = '2017-09-02');
//作業解答
select * from user where access='WIFI';
select city_code,count(access) as total from user where access = 'WIFI' group by city_code order by total desc limit 5;
select access,city_code,count(*) as total from user group by access,city_code having total>= 3 order by total desc;
select access ,count(*) as total from user where lower (model) like '%ne%' group by access order by total desc;
//各中if case when collect
//求各佔用的百分比
select sum(if(access=='WIFI' ,1,0)) / count(*) from user;
//各段號的分組統計
select
case
when uid % 10 in (0,1,2,3) then '0-3'
when uid % 10 in (4,5,6,7) then '4-7'
else '8-9'
end as interval,
count(*) as cnt
from user
group by
case
when uid % 10 in (0,1,2,3) then '0-3'
when uid % 10 in (4,5,6,7) then '4-7'
else '8-9'
end;
//去重
select collect_set(access) from user;
//統計user表city_code最多的4個城市的access分佈情況,儲存為map
select city_code,count(*) as cnt,
map(
'WIFI',sum(if(access='WIFI',1,0)),
'2G',sum(if(access='2G',1,0)),
'3G',sum(if(access='3G',1,0)),
'4G',sum(if(access='4G',1,0))
) as map_reten_acc
from user
group by city_code
order by cnt desc limit 4;
//左連線 left join
select user.uid, user.city_code, citys.city_name
from (select * from user where uid <= 100) user
left join (select * from citys where province_code <= 30) citys
on (user.city_code = citys.city_code) limit 20;
//右連線 right join
select user.uid, user.city_code, citys.city_name
from (select * from user where uid <= 100) user
right join (select * from citys where province_code <= 30) citys
on (user.city_code = citys.city_code) limit 20;
//內連線inner join
select user.uid, user.city_code, citys.city_name
from (select * from user where uid <= 100) user
inner join (select * from citys where province_code <= 30) citys
on (user.city_code = citys.city_code) limit 20;
//全連線 full join
select user.uid, user.city_code, citys.city_name
from (select * from user where uid <= 100) user
full join (select * from citys where province_code <= 30) citys
on (user.city_code = citys.city_code) limit 20;
//視窗函式
//分組topN
select access,city_code, uid
from
(
select uid, access, city_code,
row_number() over (partition by access order by city_code desc) as row_num
from user
) a
where row_num = 1;
//累計和
select p_date,
sum(cnt) over(order by p_date asc rows between unbounded preceding and current row)
from
(
select p_date, count(*) as cnt
from user_daily
where p_date between '2017-09-01' and '2017-09-30'
group by p_date
)a
;
cd /usr/local/hive/
ls
cd conf
ls
vi hive-site.xml
查詢mysql中的內容
ssh bigdata003
mysql -u hive -phive2017 -D hive
show tables;
select * from DBS \G;
exit
ssh bigdata001
//建立自己的資料庫
create databases luolin;
use luolin;
//建立一個citys表
CREATE TABLE citys(
province_code INT,
province_name string,
city_code INT,
city_name string
)
row FORMAT delimited
fields terminated by ','
lines terminated by '\n';
//載入需要處理的資料檔案
load data local inpath '/home/bigdata/hive/city.txt' into table citys;
//sql基礎回顧
select province_name,count(city_name) as total from citys group by province_name order by total desc limit 10;
select distinct province_name from citys;
select count(*) from ( select province_name from citys group by province_name ) a;
select count(distinct province_name) from citys;
//建立一個city_ex表
CREATE TABLE city_ex(
province_code int,
province_name string,
city_code int,
city_name string
)
ROW FORMAT DELIMITED FIELDS
TERMINATED BY ','
LINES TERMINATED BY '\n'
LOCATION '/user/luo/city/';
//檢查四川的資料
select * from city_ex where province_name = '四川';
// 建立一個user表
CREATE TABLE user(
uid INT,
city_code INT,
model string,
access string
)
row FORMAT delimited
fields terminated by ','
lines terminated by '\n';
//載入遠端資料
load data local inpath '/home/bigdata/tanqi/hive/user.txt' into table user;
//建立分割槽表
CREATE TABLE user_daily(
uid INT,
city_code INT,
model string,
access string
)
partitioned by (p_date string);
//插入分割槽
INSERT OVERWRITE TABLE user_daily PARTITION (p_date='2017-09-01') SELECT * FROM user;
//顯示分割槽
show partitions user_daily;
//動態插入分割槽
set hive.exec.dynamic.partition.mode=nonstrict;
insert overwrite table user_daily partition(p_date)
select *,'2017-09-02' from user union all select *,'2017-09-03' from user;
//修改分割槽
alter table user_daily partition (p_date = '2017-09-01') rename to partition (p_date = '20170901');
//刪除分割槽
alter table user_daily drop partition ( p_date = '2017-09-02');
//作業解答
select * from user where access='WIFI';
select city_code,count(access) as total from user where access = 'WIFI' group by city_code order by total desc limit 5;
select access,city_code,count(*) as total from user group by access,city_code having total>= 3 order by total desc;
select access ,count(*) as total from user where lower (model) like '%ne%' group by access order by total desc;
//各中if case when collect
//求各佔用的百分比
select sum(if(access=='WIFI' ,1,0)) / count(*) from user;
//各段號的分組統計
select
case
when uid % 10 in (0,1,2,3) then '0-3'
when uid % 10 in (4,5,6,7) then '4-7'
else '8-9'
end as interval,
count(*) as cnt
from user
group by
case
when uid % 10 in (0,1,2,3) then '0-3'
when uid % 10 in (4,5,6,7) then '4-7'
else '8-9'
end;
//去重
select collect_set(access) from user;
//統計user表city_code最多的4個城市的access分佈情況,儲存為map
select city_code,count(*) as cnt,
map(
'WIFI',sum(if(access='WIFI',1,0)),
'2G',sum(if(access='2G',1,0)),
'3G',sum(if(access='3G',1,0)),
'4G',sum(if(access='4G',1,0))
) as map_reten_acc
from user
group by city_code
order by cnt desc limit 4;
//左連線 left join
select user.uid, user.city_code, citys.city_name
from (select * from user where uid <= 100) user
left join (select * from citys where province_code <= 30) citys
on (user.city_code = citys.city_code) limit 20;
//右連線 right join
select user.uid, user.city_code, citys.city_name
from (select * from user where uid <= 100) user
right join (select * from citys where province_code <= 30) citys
on (user.city_code = citys.city_code) limit 20;
//內連線inner join
select user.uid, user.city_code, citys.city_name
from (select * from user where uid <= 100) user
inner join (select * from citys where province_code <= 30) citys
on (user.city_code = citys.city_code) limit 20;
//全連線 full join
select user.uid, user.city_code, citys.city_name
from (select * from user where uid <= 100) user
full join (select * from citys where province_code <= 30) citys
on (user.city_code = citys.city_code) limit 20;
//視窗函式
//分組topN
select access,city_code, uid
from
(
select uid, access, city_code,
row_number() over (partition by access order by city_code desc) as row_num
from user
) a
where row_num = 1;
//累計和
select p_date,
sum(cnt) over(order by p_date asc rows between unbounded preceding and current row)
from
(
select p_date, count(*) as cnt
from user_daily
where p_date between '2017-09-01' and '2017-09-30'
group by p_date
)a
;