1. 程式人生 > >hive資料庫基礎學習

hive資料庫基礎學習

which hive
cd /usr/local/hive/
ls
cd conf
ls
vi hive-site.xml
查詢mysql中的內容
ssh bigdata003
mysql -u hive -phive2017 -D hive
show tables;
 select * from DBS \G;
exit
ssh bigdata001
//建立自己的資料庫
create databases luolin;
use luolin;
//建立一個citys表
CREATE TABLE citys(
  province_code INT,
  province_name string,
  city_code INT,
  city_name string
)
row FORMAT delimited
fields terminated by ','
lines terminated by '\n';
//載入需要處理的資料檔案
load data local inpath '/home/bigdata/hive/city.txt' into table citys;
//sql基礎回顧
select province_name,count(city_name) as total from citys group by province_name order by total desc limit 10;

select distinct province_name from citys;

select count(*) from (  select province_name from citys group by province_name ) a;

select count(distinct province_name) from citys;
//建立一個city_ex表
CREATE TABLE city_ex(  
province_code int,
province_name string,
city_code int,  
city_name string
)
ROW FORMAT DELIMITED  FIELDS
TERMINATED BY ','
LINES TERMINATED BY '\n'
LOCATION '/user/luo/city/';
//檢查四川的資料
select * from city_ex where province_name = '四川';
// 建立一個user表
CREATE TABLE user(
  uid INT,
  city_code INT,
  model string,
  access string
)
row FORMAT delimited
fields terminated by ','
lines terminated by '\n';
//載入遠端資料
load data local inpath '/home/bigdata/tanqi/hive/user.txt' into table user;
//建立分割槽表
CREATE TABLE user_daily(
  uid INT,
  city_code INT,
  model string,
  access string
)
partitioned by (p_date string);
//插入分割槽
INSERT OVERWRITE TABLE user_daily PARTITION (p_date='2017-09-01')  SELECT * FROM user;
//顯示分割槽
show partitions user_daily;
//動態插入分割槽
set hive.exec.dynamic.partition.mode=nonstrict;
insert overwrite table user_daily partition(p_date)
select *,'2017-09-02' from user union all select *,'2017-09-03' from user;
//修改分割槽
alter table user_daily partition (p_date = '2017-09-01') rename to partition (p_date = '20170901');
//刪除分割槽
alter table user_daily drop partition ( p_date = '2017-09-02');
//作業解答
select * from user where access='WIFI';

select city_code,count(access) as total from user where access = 'WIFI' group by city_code order by total desc limit 5;


select access,city_code,count(*) as total from user group by access,city_code having total>= 3 order by total desc;

select access ,count(*) as total from user where lower (model) like '%ne%' group by access order by total desc;
//各中if case when collect
//求各佔用的百分比
select sum(if(access=='WIFI' ,1,0)) / count(*) from user;
//各段號的分組統計
select
  case
    when uid % 10 in (0,1,2,3) then '0-3'
    when uid % 10 in (4,5,6,7) then '4-7'
    else '8-9'
    end as interval,
    count(*) as cnt
  from user
  group by
  case
    when uid % 10 in (0,1,2,3) then '0-3'
    when uid % 10 in (4,5,6,7) then '4-7'
    else '8-9'
  end;
//去重
select collect_set(access) from user;
//統計user表city_code最多的4個城市的access分佈情況,儲存為map
select city_code,count(*) as cnt,
  map(
    'WIFI',sum(if(access='WIFI',1,0)),
    '2G',sum(if(access='2G',1,0)),
    '3G',sum(if(access='3G',1,0)),
    '4G',sum(if(access='4G',1,0))
  ) as map_reten_acc
from user
group by city_code
order by cnt desc limit 4;
//左連線 left join
select user.uid, user.city_code, citys.city_name
from (select * from user where uid <= 100) user
left join (select * from citys where province_code <= 30) citys
on (user.city_code = citys.city_code) limit 20;
//右連線 right join
select user.uid, user.city_code, citys.city_name
from (select * from user where uid <= 100) user
right join (select * from citys where province_code <= 30) citys
on (user.city_code = citys.city_code) limit 20;
//內連線inner join
select user.uid, user.city_code, citys.city_name
from (select * from user where uid <= 100) user
inner join (select * from citys where province_code <= 30) citys
on (user.city_code = citys.city_code) limit 20;
//全連線 full join
select user.uid, user.city_code, citys.city_name
from (select * from user where uid <= 100) user
full join (select * from citys where province_code <= 30) citys
on (user.city_code = citys.city_code) limit 20;
//視窗函式
//分組topN
select access,city_code, uid
from
(
  select uid, access, city_code,
  row_number() over (partition by access order by city_code desc) as row_num
  from user
) a
where row_num = 1;
//累計和
select p_date,
  sum(cnt) over(order by p_date asc rows between unbounded preceding and current row)
from
(
  select p_date, count(*) as cnt
  from user_daily   
  where p_date between '2017-09-01' and '2017-09-30'
  group by p_date
)a
;