C# Activator的用法
1、視窗函式
視窗範圍限定:
OVER()
CURRENT_ROW 當前行
N PRECEDING 向前N行
N FOLLOWING 向後N行
UNBOUNDED PRECEDING 起點
UNBOUNDED FOLLOWING 終點
order by[asc/desc] 有序
partition by 分組
建表:
create table if not exists business( name string, orderdate string, cost int ) ROW FORMAT DELIMITED FIELDS TERMINATED BY ','; load data local inpath '/home/atbwie/a.txt' overwrite into table business;
資料:
資料準備:name,orderdate,cost
jack,2017-01-01,10
tony,2017-01-02,15
jack,2017-02-03,23
tony,2017-01-04,29
jack,2017-01-05,46
jack,2017-04-06,42
tony,2017-01-07,50
jack,2017-01-08,55
mart,2017-04-08,62
mart,2017-04-09,68
neil,2017-05-10,12
mart,2017-04-11,75
neil,2017-06-12,80
mart,2017-04-13,94
select * from business;
函式:
LAG(col,n,default_value) 向前第n行,有序視窗
select *,lag(orderdate,1,null) over(partition by name order by orderdate ) as o1 from business
LEAD(col,n,default_value) 向後第n行,有序視窗
select *,lag(orderdate,1,null) over(partition by name order by orderdate ) as o1 from business
NTILE(n) 將資料分成n組,有序視窗
select *,ntile(6) over(order by orderdate ) as o1 from business;
percent_rank() 顯示該條記錄佔視窗資料的百分比
select *,percent_rank() over(order by orderdate ) as o1 from business;
建表 載入資料
create table score(
name string,
subject string,
score int)
row format delimited fields terminated by ",";
load data local inpath '/home/atbwie/b.txt' into table score;
資料
孫悟空,數學,95
宋宋,數學,86
婷婷,數學,85
大海,數學,56
宋宋,英語,84
大海,英語,84
婷婷,英語,78
孫悟空,英語,68
大海,語文,94
孫悟空,語文,87
婷婷,語文,65
宋宋,語文,64
select * from score1;
2、排名函式
RANK() 排序相同時會重複,總數不會變
select *,rank() over(partition by subject order by score desc) rp
from score1;
DENSE_RANK() 排序相同時會重複,總數會減少
select *,dense_rank() over(partition by subject order by score desc) drp
from score1;
ROW_NUMBER() 會根據順序計算
select *,row_number() over(partition by subject order by score desc) rmp
from score1;
3、日期函式
CURRENT_DATE() 當前日期
select `current_date`();
DATE_ADD(start_date,num_days) 返回開始日期後n天的日期
select date_add(current_date,1);
DATE_SUB(start_date,num_days) 返回開始日期前n天的日期
select date_sub(current_date,1);
DATE_DIFF(date_1,date_2) 返回兩個日期的差(天數)
select datediff(current_date,“2021-01-01”);
4、其他常用函式
空欄位賦值:
NVL(col,defalut_value) 如果col為空,返回預設值
create table person_info(
name string,
constellation string,
blood_type string)
row format delimited fields terminated by "\t";
load data local inpath "/home/atbwie/b.txt" overwrite into table person_info;
select * from person_info;
資料
孫悟空 白羊座 A
大海 射手座 A
宋宋 白羊座 B
豬八戒 白羊座 A
鳳姐 射手座 A
行轉列:
COLLECT_LIST() 可以重複
COLLECT_SET() 去重
字串切分:
SPLIT(str,regex) 以正則將給定的字串切分為Array
select t1.base,concat_ws('|', collect_set(t1.name)) name
from(select name, concat(constellation, ",", blood_type) base
from person_info) t1
group by t1.base;
列轉行:
EXPLODE(ARRAY)
create table movie_info(
movie string,
category string)
row format delimited fields terminated by "\t";
load data local inpath "/opt/module/datas/movie.txt" into table movie_info;
《疑犯追蹤》 懸疑,動作,科幻,劇情
《Lie to me》 懸疑,警匪,動作,心理,劇情
《戰狼2》 戰爭,動作,災難
select m.movie,tbl.cate
from movie_info m
lateral view explode(split(category, ",")) tbl as cate;
UDTF:一進多出函式
UDAF:多進一出函式
UDF:一進一出函式