2、函數
阿新 • • 發佈:2018-11-15
string amp 下拉 tile cat true part 行數 style
時間函數
當前日期
select current_date(); 2018-11-14
當前時間戳
select current_timestamp(); 2018-11-14 21:35:16.237
date_format()
select date_format(current_date(),‘yyyyMMdd‘); 20181114 select date_format(current_timestamp(),‘yyyyMMdd‘); 20181114
unix_timestamp()
select unix_timestamp();+-----------------------------------------------------------+--+ | unix_timestamp(current_timestamp(), yyyy-MM-dd HH:mm:ss) | +-----------------------------------------------------------+--+ | 1542202845 | +-----------------------------------------------------------+--+
from_unixtime()
select from_unixtime(unix_timestamp(),‘yyyyMMdd HH:mm:ss‘); +--------------------------------------------------------------------------------------------- | from_unixtime(unix_timestamp(current_timestamp(), yyyy-MM-dd HH:mm:ss), yyyyMMdd HH:mm:ss) +--------------------------------------------------------------------------------------------- | 2018111421:43:14
日期差值
0: jdbc:hive2://s101:10000/lx> select datediff(‘2019-08-02‘,‘2019-08-06‘); +---------------------------------------------------------------+--+ | datediff(CAST(2019-08-02 AS DATE), CAST(2019-08-06 AS DATE)) | +---------------------------------------------------------------+--+ | -4 | +---------------------------------------------------------------+--+
字符串函數
split
select explode(split(‘hello‘,‘‘));
substr
select substr(‘hello‘,1,3);
trim去除前後空格
select trim(‘ hello ‘);
format_number
select format_number(1234.345,1);
concat
length
條件語句
窗口函數
lead
lead(input[, offset[, default]]) 上提 input: 上提列 offset: 上提行數,可選,默認是 1 行 default:填充值,可選,默認是null 使用:select id,name,lead(id,2,‘qq‘)over(partition by id order by id) lad from www; 註意:2可選,不寫默認1 ‘qq’可選,不寫默認null,類型需要對應,此處id是int,所以還是為null partition by id:可選,分組後對每個組進行lead order by id:必須寫
lag
lag(input[, offset[, default]]) 下拉 input: 下拉列 offset: 下拉行數,可選,默認是 1 行 default:填充值,可選,默認是null 使用:select id,name,lag(id,2,11)over(partition by id order by id) lag from www; 註意:2可選,不寫默認1 11可選,不寫默認null partition by id:可選,分組後對每個組進行lag order by id:必須寫
first_value
first_value(expr[, isIgnoreNull]) expr:列名或一個表達式 isIgnoreNull:true或false,如果是true將跳過null值,可選,默認false select id,name,first_value(concat(cast(id as string), name),true)over(partition by name order by id) lag from www;
last_value
select id,name,last_value(concat(cast(id as string), name),true)over(order by id ) lag from www; +-----+-------+--------+--+ | id | name | lag | +-----+-------+--------+--+ | 1 | a | 1a | | 2 | b | 2b | | 3 | c | 3c | | 4 | c | 4c | | 5 | c | 5c | | 6 | d | 6d | | 7 | b | 7b | | 8 | a | 8a | | 9 | a | 9a | | 12 | eee | 12eee | +-----+-------+--------+--+select id,name from www; +-----+-------+--+ | id | name | +-----+-------+--+ | 12 | eee | | 1 | a | | 2 | b | | 3 | c | | 4 | c | | 5 | c | | 6 | d | | 7 | b | | 8 | a | | 9 | a | +-----+-------+--+ select id,name,last_value(concat(cast(id as string), name),true)over() lag from www; +-----+-------+------+--+ | id | name | lag | +-----+-------+------+--+ | 12 | eee | 6d | | 7 | b | 6d | | 8 | a | 6d | | 9 | a | 6d | | 1 | a | 6d | | 2 | b | 6d | | 3 | c | 6d | | 4 | c | 6d | | 5 | c | 6d | | 6 | d | 6d | +-----+-------+------+--+
over和標準聚合函數
select distinct name,count(name)over(partition by name) s from www; 求分區個數並去重 select distinct name,sum(id)over(partition by name) s from www; 分區id和並去重 select id,name,max(length(name))over() from www; 總體的最大長度 select name,min(id)over(partition by name) s from www; 每個分區最小id select name,max(id)over(partition by name) s from www; 每個分區最大id select name,avg(id)over(partition by name) s from www; 每個分區平均id
over和partiton by
over和partition by order by
select first_value(id)over(partition by id,name) from www; select first_value(id)over(partition by id,name order by id,name) from www;
分析函數
rank
row_number
dense_rank
cume_dist
percent_rank
ntile
2、函數