Hive Sum MAX MIN聚合函數
阿新 • • 發佈:2018-11-23
format 子句 結果 part ren txt avg home 表示 數據準備
cookie1,2015-04-10,1
cookie1,2015-04-11,5
cookie1,2015-04-12,7
cookie1,2015-04-13,3
cookie1,2015-04-14,2
cookie1,2015-04-15,4
cookie1,2015-04-16,4
創建數據庫及表
create database if not exists cookie;
use cookie;
drop table if exists cookie1;
create table cookie1(cookieid string, createtime string, pv int) row format delimited fields terminated by ‘,‘;
load data local inpath "/home/hadoop/cookie1.txt" into table cookie1;
select * from cookie1;
SUM函數
select
cookieid,
createtime,
pv,
sum(pv) over (partition by cookieid order by createtime rows between unbounded preceding and current row) as pv1,
sum(pv) over (partition by cookieid order by createtime) as pv2,
sum(pv) over (partition by cookieid) as pv3,
sum(pv) over (partition by cookieid order by createtime rows between 3 preceding and current row) as pv4,
sum(pv) over (partition by cookieid order by createtime rows between 3 preceding and 1 following) as pv5,
sum(pv) over (partition by cookieid order by createtime rows between current row and unbounded following) as pv6
from cookie1;
解釋
pv1: 分組內從起點到當前行的pv累積,如,11號的pv1=10號的pv+11號的pv, 12號=10號+11號+12號
pv2: 同pv1
pv3: 分組內(cookie1)所有的pv累加
pv4: 分組內當前行+往前3行,如,11號=10號+11號, 12號=10號+11號+12號, 13號=10號+11號+12號+13號, 14號=11號+12號+13號+14號
pv5: 分組內當前行+往前3行+往後1行,如,14號=11號+12號+13號+14號+15號=5+7+3+2+4=21
pv6: 分組內當前行+往後所有行,如,13號=13號+14號+15號+16號=3+2+4+4=13,14號=14號+15號+16號=2+4+4=10
關鍵字解釋
如果不指定ROWS BETWEEN,默認為從起點到當前行;
如果不指定ORDER BY,則將分組內所有值累加;
關鍵是理解ROWS BETWEEN含義,也叫做WINDOW子句:
PRECEDING:往前
FOLLOWING:往後
CURRENT ROW:當前行
UNBOUNDED:起點,
UNBOUNDED PRECEDING 表示從前面的起點,
UNBOUNDED FOLLOWING:表示到後面的終點
–其他AVG,MIN,MAX,和SUM用法一樣。
AVG函數
select
cookieid,
createtime,
pv,
avg(pv) over (partition by cookieid order by createtime rows between unbounded preceding and current row) as pv1, -- 默認為從起點到當前行
avg(pv) over (partition by cookieid order by createtime) as pv2, --從起點到當前行,結果同pv1
avg(pv) over (partition by cookieid) as pv3, --分組內所有行
avg(pv) over (partition by cookieid order by createtime rows between 3 preceding and current row) as pv4, --當前行+往前3行
avg(pv) over (partition by cookieid order by createtime rows between 3 preceding and 1 following) as pv5, --當前行+往前3行+往後1行
avg(pv) over (partition by cookieid order by createtime rows between current row and unbounded following) as pv6 --當前行+往後所有行
from cookie1;
cookie1,2015-04-10,1
cookie1,2015-04-11,5
cookie1,2015-04-12,7
cookie1,2015-04-13,3
cookie1,2015-04-14,2
cookie1,2015-04-15,4
cookie1,2015-04-16,4
創建數據庫及表
create database if not exists cookie;
use cookie;
drop table if exists cookie1;
create table cookie1(cookieid string, createtime string, pv int) row format delimited fields terminated by ‘,‘;
select * from cookie1;
SUM函數
select
cookieid,
createtime,
pv,
sum(pv) over (partition by cookieid order by createtime rows between unbounded preceding and current row) as pv1,
sum(pv) over (partition by cookieid order by createtime) as pv2,
sum(pv) over (partition by cookieid order by createtime rows between 3 preceding and current row) as pv4,
sum(pv) over (partition by cookieid order by createtime rows between 3 preceding and 1 following) as pv5,
sum(pv) over (partition by cookieid order by createtime rows between current row and unbounded following) as pv6
解釋
pv1: 分組內從起點到當前行的pv累積,如,11號的pv1=10號的pv+11號的pv, 12號=10號+11號+12號
pv2: 同pv1
pv3: 分組內(cookie1)所有的pv累加
pv4: 分組內當前行+往前3行,如,11號=10號+11號, 12號=10號+11號+12號, 13號=10號+11號+12號+13號, 14號=11號+12號+13號+14號
pv5: 分組內當前行+往前3行+往後1行,如,14號=11號+12號+13號+14號+15號=5+7+3+2+4=21
pv6: 分組內當前行+往後所有行,如,13號=13號+14號+15號+16號=3+2+4+4=13,14號=14號+15號+16號=2+4+4=10
關鍵字解釋
如果不指定ROWS BETWEEN,默認為從起點到當前行;
如果不指定ORDER BY,則將分組內所有值累加;
關鍵是理解ROWS BETWEEN含義,也叫做WINDOW子句:
PRECEDING:往前
FOLLOWING:往後
CURRENT ROW:當前行
UNBOUNDED:起點,
UNBOUNDED PRECEDING 表示從前面的起點,
UNBOUNDED FOLLOWING:表示到後面的終點
–其他AVG,MIN,MAX,和SUM用法一樣。
AVG函數
select
cookieid,
createtime,
pv,
avg(pv) over (partition by cookieid order by createtime rows between unbounded preceding and current row) as pv1, -- 默認為從起點到當前行
avg(pv) over (partition by cookieid order by createtime) as pv2, --從起點到當前行,結果同pv1
avg(pv) over (partition by cookieid) as pv3, --分組內所有行
avg(pv) over (partition by cookieid order by createtime rows between 3 preceding and current row) as pv4, --當前行+往前3行
avg(pv) over (partition by cookieid order by createtime rows between 3 preceding and 1 following) as pv5, --當前行+往前3行+往後1行
avg(pv) over (partition by cookieid order by createtime rows between current row and unbounded following) as pv6 --當前行+往後所有行
from cookie1;
Hive Sum MAX MIN聚合函數