hive內建函式大全
阿新 • • 發佈:2019-02-14
一、檢視函式
show functions 顯示hive下內建所有函式
desc function extended add_months 顯示add_months 函式用法
二、按首字母排序
hive版本1.1.0-cdh5.7.0.
函式 | Usage | 舉例 |
! | ! a - Logical not | No example for !. |
% | a % b - Returns the remainder when dividing a by b. | No example for %. |
& | a & b - Bitwise AND. | SELECT 3 & 5; 1 |
* | a * b - Multiplies a by b. | No example for *. |
+ | a + b - Returns a+b. | No example for +. |
- | a - b - Returns a-b. | No example for -. |
/ | a / b - Divides a by b. | > SELECT 3 / 2; 1.5 |
< | a < b - Returns TRUE if a is less than b. | No example for <. |
<= | a <= b - Returns TRUE if a is not greater than b. | No example for <=. |
<=> | a <=> b - Returns same result with EQUAL(=) operator for non-null operands, but returns TRUE if both are NULL, FALSE if one of the them is NULL. | No example for <=>. |
= | a = b - Returns TRUE if a equals b and false otherwise. | No example for =. |
== | a == b - Returns TRUE if a equals b and false otherwise. | No example for ==. |
> | a > b - Returns TRUE if a is greater than b. | No example for >. |
>= | a >= b - Returns TRUE if a is not smaller than b. | No example for >=. |
^ | a ^ b - Bitwise exclusive OR. | SELECT 3 ^ 5; 2 |
abs | abs(expr) - Returns the absolute value of the numeric value | > SELECT abs('-1'); 1 |
acos | acos(x) - Returns the arc cosine of x if -1<=x<=1 or NaN otherwise. | > SELECT acos(1); 0.0 > SELECT acos(2); NaN |
add_months | add_months(start_date, num_months) - Returns the date that is num_months after start_date. | > SELECT add_months('2016-08-31', 1); '2016-09-30' |
and | a and b - Logical AND. | No example for and. |
approx_count_distinct | approx_count_distinct(expr) - Returns the estimated cardinality by HyperLogLog++. approx_count_distinct(expr, relativeSD=0.05) - Returns the estimated cardinality by HyperLogLog++ with relativeSD, the maximum estimation error allowed. | No example for approx_count_distinct. |
array | array(n0, ...) - Returns an array with the given elements. | No example for array. |
array_contains | array_contains(array, value) - Returns TRUE if the array contains the value. | > SELECT array_contains(array(1, 2, 3), 2); true |
ascii | ascii(str) - Returns the numeric value of the first character of str. | > SELECT ascii('222'); 50 > SELECT ascii(2); 50 |
asin | asin(x) - Returns the arc sin of x if -1<=x<=1 or NaN otherwise. | > SELECT asin(0); 0.0 > SELECT asin(2); NaN |
assert_true | assert_true(condition) - Throw an exception if 'condition' is not true. | No example for assert_true. |
atan | atan(x) - Returns the arc tangent. | > SELECT atan(0); 0.0 |
atan2 | atan2(x,y) - Returns the arc tangent2. | > SELECT atan2(0, 0); 0.0 |
avg | avg(x) - Returns the mean calculated from values of a group. | No example for avg. |
base64 | base64(bin) - Convert the argument from binary to a base 64 string. | No example for base64. |
bin | bin(x) - Returns x in binary. | > SELECT bin(13); '1101' |
bround | bround(x, d) - Round x to d decimal places using HALF_EVEN rounding mode. | > SELECT bround(2.5, 0); 2.0 |
cbrt | cbrt(x) - Returns the cube root of a double value. | > SELECT cbrt(27.0); 3.0 |
ceil | ceil(x) - Returns the smallest integer not smaller than x. | > SELECT ceil(-0.1); 0 > SELECT ceil(5); 5 |
ceiling | ceiling(x) - Returns the smallest integer not smaller than x. | > SELECT ceiling(-0.1); 0 > SELECT ceiling(5); 5 |
coalesce | coalesce(a1, a2, ...) - Returns the first non-null argument if exists. Otherwise, NULL. | SELECT coalesce(NULL, 1, NULL); 1 |
collect_list | collect_list(expr) - Collects and returns a list of non-unique elements. | No example for collect_list. |
collect_set | collect_set(expr) - Collects and returns a set of unique elements. | No example for collect_set. |
concat | concat(str1, str2, ..., strN) - Returns the concatenation of str1, str2, ..., strN | SELECT concat('Spark','SQL'); 'SparkSQL' |
concat_ws | concat_ws(sep, [str | array(str)]+) - Returns the concatenation of the strings separated by sep. | > SELECT concat_ws(' ', Spark', 'SQL'); 'Spark SQL' |
conv | conv(num, from_base, to_base) - Convert num from from_base to to_base. | > SELECT conv('100', 2, 10); '4' > SELECT conv(-10, 16, -10); '16' |
corr | corr(x,y) - Returns Pearson coefficient of correlation between a set of number pairs. | No example for corr. |
cos | cos(x) - Returns the cosine of x. | > SELECT cos(0); 1.0 |
cosh | cosh(x) - Returns the hyperbolic cosine of x. | > SELECT cosh(0); 1.0 |
count | count(*) - Returns the total number of retrieved rows, including rows containing NULL values. count(expr) - Returns the number of rows for which the supplied expression is non-NULL. count(DISTINCT expr[, expr...]) - Returns the number of rows for which the supplied expression(s) are unique and non-NULL | No example for count. |
covar_pop | covar_pop(x,y) - Returns the population covariance of a set of number pairs. | No example for covar_pop. |
covar_samp | covar_samp(x,y) - Returns the sample covariance of a set of number pairs. | No example for covar_samp. |
crc32 | crc32(input) - Returns a cyclic redundancy check value as a bigint of the input | > SELECT crc32('Spark'); '1557323817' |
cube | N/A. | N/A. |
cume_dist | cume_dist() - The CUME_DIST() function computes the position of a value relative to a all values in the partition. | No example for cume_dist. |
current_database | current_database() - Returns the current database. | > SELECT current_database() |
current_date | current_date() - Returns the current date at the start of query evaluation. | No example for current_date. |
current_timestamp | current_timestamp() - Returns the current timestamp at the start of query evaluation. | No example for current_timestamp. |
date_add | date_add(start_date, num_days) - Returns the date that is num_days after start_date. | > SELECT date_add('2016-07-30', 1); '2016-07-31' |
date_format | date_format(date/timestamp/string, fmt) - Converts a date/timestamp/string to a value of string in the format specified by the date format fmt. | > SELECT date_format('2016-04-08', 'y') '2016' |
date_sub | date_sub(start_date, num_days) - Returns the date that is num_days before start_date. | > SELECT date_sub('2016-07-30', 1); '2016-07-29' |
datediff | datediff(date1, date2) - Returns the number of days between date1 and date2. | > SELECT datediff('2009-07-30', '2009-07-31'); 1 |
day | day(param) - Returns the day of month of date/timestamp, or the day of interval. | > SELECT day('2009-07-30'); 30 |
dayofmonth | dayofmonth(param) - Returns the day of month of date/timestamp, or the day of interval. | > SELECT dayofmonth('2009-07-30'); 30 |
dayofyear | dayofyear(param) - Returns the day of year of date/timestamp. | > SELECT dayofyear('2016-04-09'); 100 |
decode | decode(bin, str) - Decode the first argument using the second argument character set. | No example for decode. |
degrees | degrees(x) - Converts radians to degrees. | > SELECT degrees(3.141592653589793); 180.0 |
dense_rank | dense_rank() - The DENSE_RANK() function computes the rank of a value in a group of values. The result is one plus the previously assigned rank value. Unlike Rank, DenseRank will not produce gaps in the ranking sequence. | No example for dense_rank. |
e | e() - Returns Euler's number, E. | > SELECT e(); 2.718281828459045 |
elt | elt(n, str1, str2, ...) - returns the n-th string, e.g. returns str2 when n is 2 | > SELECT elt(1, 'scala', 'java') FROM src LIMIT 1; 'scala' |
encode | encode(str, str) - Encode the first argument using the second argument character set | No example for encode. |
exp | exp(x) - Returns e to the power of x. | > SELECT exp(0); 1.0 |
explode | explode(a) - Separates the elements of array a into multiple rows, or the elements of map a into multiple rows and columns. Extended Usage: > SELECT explode(array(10,20)); 10 20 | > SELECT explode(array(10,20)); 10 20 |
expm1 | expm1(x) - Returns exp(x) - 1. | > SELECT expm1(0); 0.0 |
factorial | factorial(n) - Returns n factorial for n is [0..20]. Otherwise, NULL. | > SELECT factorial(5); 120 |
find_in_set | find_in_set(str, str_array) - Returns the index (1-based) of the given string (left) in the comma-delimited list (right). Returns 0, if the string wasn't found or if the given string (left) contains a comma. | > SELECT find_in_set('ab','abc,b,ab,c,def'); 3 |
first | first(expr) - Returns the first value of `child` for a group of rows. first(expr,isIgnoreNull=false) - Returns the first value of `child` for a group of rows. If isIgnoreNull is true, returns only non-null values. | No example for first. |
first_value | first_value(expr) - Returns the first value of `child` for a group of rows. first_value(expr,isIgnoreNull=false) - Returns the first value of `child` for a group of rows. If isIgnoreNull is true, returns only non-null values. | No example for first_value. |
floor | floor(x) - Returns the largest integer not greater than x. | > SELECT floor(-0.1); -1 > SELECT floor(5); 5 |
format_number | format_number(X, D) - Formats the number X like '#,###,###.##', rounded to D decimal places. If D is 0, the result has no decimal point or fractional part. This is supposed to function like MySQL's FORMAT. | > SELECT format_number(12332.123456, 4); '12,332.1235' |
format_string | format_string(String format, Obj... args) - Returns a formatted string from printf-style format strings. | > SELECT format_string("Hello World %d %s", 100, "days"); 'Hello World 100 days' |
from_unixtime | from_unixtime(unix_time, format) - Returns unix_time in the specified format | > SELECT from_unixtime(0, 'yyyy-MM-dd HH:mm:ss'); '1970-01-01 00:00:00' |
from_utc_timestamp | from_utc_timestamp(timestamp, string timezone) - Assumes given timestamp is UTC and converts to given timezone. | No example for from_utc_timestamp. |
get_json_object | get_json_object(json_txt, path) - Extract a json object from path | No example for get_json_object. |
greatest | greatest(n1, ...) - Returns the greatest value of all parameters, skipping null values. | No example for greatest. |
grouping | N/A. | N/A. |
grouping_id | N/A. | N/A. |
hash | hash(a1, a2, ...) - Returns a hash value of the arguments. | No example for hash. |
hex | hex(x) - Convert the argument to hexadecimal. | > SELECT hex(17); '11' > SELECT hex('Spark SQL'); '537061726B2053514C' |
hour | hour(param) - Returns the hour component of the string/timestamp/interval. | > SELECT hour('2009-07-30 12:58:59'); 12 |
hypot | hypot(a, b) - Returns sqrt(a**2 + b**2). | > SELECT hypot(3, 4); 5.0 |
if | if(expr1,expr2,expr3) - If expr1 is TRUE then IF() returns expr2; otherwise it returns expr3. | No example for if. |
ifnull | ifnull(a,b) - Returns b if a is null, or a otherwise. | No example for ifnull. |
in | expr in(val1, val2, ...) - Returns true if expr equals to any valN. | No example for in. |
initcap | initcap(str) - Returns str with the first letter of each word in uppercase. All other letters are in lowercase. Words are delimited by white space. | > SELECT initcap('sPark sql'); 'Spark Sql' |
inline | inline(a) - Explodes an array of structs into a table. | > SELECT inline(array(struct(1, 'a'), struct(2, 'b'))); [1,a] [2,b] |
input_file_name | input_file_name() - Returns the name of the current file being read if available | > SELECT input_file_name(); '' |
instr | instr(str, substr) - Returns the (1-based) index of the first occurrence of substr in str. | > SELECT instr('SparkSQL', 'SQL'); 6 |
isnan | isnan(a) - Returns true if a is NaN and false otherwise. | No example for isnan. |
isnotnull | isnotnull(a) - Returns true if a is not NULL and false otherwise. | No example for isnotnull. |
isnull | isnull(a) - Returns true if a is NULL and false otherwise. | No example for isnull. |
json_tuple | json_tuple(jsonStr, p1, p2, ..., pn) - like get_json_object, but it takes multiple names and return a tuple. All the input parameters and output column types are string. | No example for json_tuple. |
kurtosis | kurtosis(x) - Returns the Kurtosis value calculated from values of a group. | No example for kurtosis. |
lag | lag(input, offset, default) - LAG returns the value of 'x' at 'offset' rows before the current row in the window | No example for lag. |
last | last(expr,isIgnoreNull) - Returns the last value of `child` for a group of rows. | No example for last. |
last_day | last_day(date) - Returns the last day of the month which the date belongs to. | > SELECT last_day('2009-01-12'); '2009-01-31' |
last_value | last_value(expr,isIgnoreNull) - Returns the last value of `child` for a group of rows. | No example for last_value. |
lcase | lcase(str) - Returns str with all characters changed to lowercase | > SELECT lcase('SparkSql'); 'sparksql' |
lead | lead(input, offset, default) - LEAD returns the value of 'x' at 'offset' rows after the current row in the window | No example for lead. |
least | least(n1, ...) - Returns the least value of all parameters, skipping null values. | No example for least. |
length | length(str | binary) - Returns the length of str or number of bytes in binary data. | > SELECT length('Spark SQL'); 9 |
levenshtein | levenshtein(str1, str2) - Returns the Levenshtein distance between the two given strings. | > SELECT levenshtein('kitten', 'sitting'); 3 |
like | str like pattern - Returns true if str matches pattern and false otherwise. | No example for like. |
ln | ln(x) - Returns the natural logarithm of x with base e. | > SELECT ln(1); 0.0 |
locate | locate(substr, str[, pos]) - Returns the position of the first occurrence of substr in str after position pos. The given pos and return value are 1-based. | > SELECT locate('bar', 'foobarbar', 5); 7 |
log | log(b, x) - Returns the logarithm of x with base b. | > SELECT log(10, 100); 2.0 |
log10 | log10(x) - Returns the logarithm of x with base 10. | > SELECT log10(10); 1.0 |
log1p | log1p(x) - Returns log(1 + x). | > SELECT log1p(0); 0.0 |
log2 | log2(x) - Returns the logarithm of x with base 2. | > SELECT log2(2); 1.0 |
lower | lower(str) - Returns str with all characters changed to lowercase | > SELECT lower('SparkSql'); 'sparksql' |
lpad | lpad(str, len, pad) - Returns str, left-padded with pad to a length of len. If str is longer than len, the return value is shortened to len characters. | > SELECT lpad('hi', 5, '??'); '???hi' > SELECT lpad('hi', 1, '??'); 'h' |
ltrim | ltrim(str) - Removes the leading space characters from str. | > SELECT ltrim(' SparkSQL '); 'SparkSQL ' |
map | map(key0, value0, key1, value1...) - Creates a map with the given key/value pairs. | No example for map. |
map_keys | map_keys(map) - Returns an unordered array containing the keys of the map. | > SELECT map_keys(map(1, 'a', 2, 'b')); [1,2] |
map_values | map_values(map) - Returns an unordered array containing the values of the map. | > SELECT map_values(map(1, 'a', 2, 'b')); ["a","b"] |
max | max(expr) - Returns the maximum value of expr. | No example for max. |
md5 | md5(input) - Returns an MD5 128-bit checksum as a hex string of the input | > SELECT md5('Spark'); '8cde774d6f7333752ed72cacddb05126' |
mean | mean(x) - Returns the mean calculated from values of a group. | No example for mean. |
min | min(expr) - Returns the minimum value of expr. | No example for min. |
minute | minute(param) - Returns the minute component of the string/timestamp/interval. | > SELECT minute('2009-07-30 12:58:59'); 58 |
monotonically_increasing_id | monotonically_increasing_id() - Returns monotonically increasing 64-bit integers. The generated ID is guaranteed to be monotonically increasing and unique, but not consecutive. The current implementation puts the partition ID in the upper 31 bits, and the lower 33 bits represent the record number within each partition. The assumption is that the data frame has less than 1 billion partitions, and each partition has less than 8 billion records. | > SELECT monotonically_increasing_id(); 0 |
month | month(param) - Returns the month component of the date/timestamp/interval | > SELECT month('2016-07-30'); 7 |
months_between | months_between(date1, date2) - returns number of months between dates date1 and date2. | > SELECT months_between('1997-02-28 10:30:00', '1996-10-30'); 3.94959677 |
named_struct | named_struct(name1, val1, name2, val2, ...) - Creates a struct with the given field names and values. | No example for named_struct. |
nanvl | nanvl(a,b) - Returns a iff it's not NaN, or b otherwise. | No example for nanvl. |
negative | negative(a) - Returns -a. | No example for negative. |
next_day | next_day(start_date, day_of_week) - Returns the first date which is later than start_date and named as indicated. | > SELECT next_day('2015-01-14', 'TU'); '2015-01-20' |
not | not a - Logical not | No example for not. |
now | now() - Returns the current timestamp at the start of query evaluation. | >select now();1521529756611 |
ntile | ntile(x) - The NTILE(n) function divides the rows for each window partition into 'n' buckets ranging from 1 to at most 'n'. | No example for ntile. |
nullif | nullif(a,b) - Returns null if a equals to b, or a otherwise. | No example for nullif. |
nvl | nvl(a,b) - Returns b if a is null, or a otherwise. | No example for nvl. |
nvl2 | nvl2(a,b,c) - Returns b if a is not null, or c otherwise. | No example for nvl2. |
or | a or b - Logical OR. | No example for or. |
parse_url | parse_url(url, partToExtract[, key]) - extracts a part from a URL | Parts: HOST, PATH, QUERY, REF, PROTOCOL, AUTHORITY, FILE, USERINFO. Key specifies which query to extract. Examples: > SELECT parse_url('http://spark.apache.org/path?query=1', 'HOST') 'spark.apache.org' > SELECT parse_url('http://spark.apache.org/path?query=1', 'QUERY') 'query=1' > SELECT parse_url('http://spark.apache.org/path?query=1', 'QUERY', 'query') '1' |
percent_rank | percent_rank() - PERCENT_RANK() The PercentRank function computes the percentage ranking of a value in a group of values. | No example for percent_rank. |
pi | pi() - Returns PI. | > SELECT pi(); 3.141592653589793 |
pmod | pmod(a, b) - Returns the positive modulo | > SELECT pmod(10,3); 1 |
posexplode | posexplode(a) - Separates the elements of array a into multiple rows with positions, or the elements of a map into multiple rows and columns with positions. Extended Usage: > SELECT posexplode(array(10,20)); 0 10 1 20 | > SELECT posexplode(array(10,20)); 0 10 1 20 |
positive | positive(a) - Returns a. | No example for positive. |
pow | pow(x1, x2) - Raise x1 to the power of x2. | > SELECT pow(2, 3); 8.0 |
power | power(x1, x2) - Raise x1 to the power of x2. | > SELECT power(2, 3); 8.0 |
printf | printf(String format, Obj... args) - Returns a formatted string from printf-style format strings. | > SELECT printf("Hello World %d %s", 100, "days"); 'Hello World 100 days' |
quarter | quarter(param) - Returns the quarter of the year for date, in the range 1 to 4. | No example for quarter. |
radians | radians(x) - Converts degrees to radians. | > SELECT radians(180); 3.141592653589793 |
rand | rand(a) - Returns a random column with i.i.d. uniformly distributed values in [0, 1). | No example for rand. |
randn | randn(a) - Returns a random column with i.i.d. gaussian random distribution. | No example for randn. |
rank | rank() - RANK() computes the rank of a value in a group of values. The result is one plus the number of rows preceding or equal to the current row in the ordering of the partition. Tie values will produce gaps in the sequence. | No example for rank. |
regexp_extract | regexp_extract(str, regexp[, idx]) - extracts a group that matches regexp. | > SELECT regexp_extract('100-200', '(\d+)-(\d+)', 1); '100' |
regexp_replace | regexp_replace(str, regexp, rep) - replace all substrings of str that match regexp with rep. | > SELECT regexp_replace('100-200', '(\d+)', 'num'); 'num-num' |
repeat | repeat(str, n) - Returns the string which repeat the given string value n times. | > SELECT repeat('123', 2); '123123' |
reverse | reverse(str) - Returns the reversed given string. | > SELECT reverse('Spark SQL'); 'LQS krapS' |
rint | > SELECT rint(12.3456, 1); 12.3 | rint(x, d) - Return the rounded x at d decimal places. |
rlike | str rlike regexp - Returns true if str matches regexp and false otherwise. | No example for rlike. |
rollup | N/A. | N/A. |
round | round(x, d) - Round x to d decimal places using HALF_UP rounding mode. | > SELECT round(2.5, 0); 3.0 |
row_number | row_number() - The ROW_NUMBER() function assigns a unique, sequential number to each row, starting with one, according to the ordering of rows within the window partition. | No example for row_number. |
rpad | rpad(str, len, pad) - Returns str, right-padded with pad to a length of len. If str is longer than len, the return value is shortened to len characters. | > SELECT rpad('hi', 5, '??'); 'hi???' > SELECT rpad('hi', 1, '??'); 'h' |
rtrim | rtrim(str) - Removes the trailing space characters from str. | > SELECT rtrim(' SparkSQL '); ' SparkSQL' |
second | second(param) - Returns the second component of the string/timestamp/interval. | > SELECT second('2009-07-30 12:58:59'); 59 |
sentences | sentences(str[, lang, country]) - Splits str into an array of array of words. | > SELECT sentences('Hi there! Good morning.'); [['Hi','there'], ['Good','morning']] |
sha | sha(input) - Returns a sha1 hash value as a hex string of the input | > SELECT sha('Spark'); '85f5955f4b27a9a4c2aab6ffe5d7189fc298b92c' |
sha1 | sha1(input) - Returns a sha1 hash value as a hex string of the input | > SELECT sha1('Spark'); '85f5955f4b27a9a4c2aab6ffe5d7189fc298b92c' |
sha2 | sha2(input, bitLength) - Returns a checksum of SHA-2 family as a hex string of the input. SHA-224, SHA-256, SHA-384, and SHA-512 are supported. Bit length of 0 is equivalent to 256. | > SELECT sha2('Spark', 0); '529bc3b07127ecb7e53a4dcf1991d9152c24537d919178022b2c42657f79a26b' |
shiftleft | shiftleft(a, b) - Bitwise left shift. | > SELECT shiftleft(2, 1); 4 |
shiftright | shiftright(a, b) - Bitwise right shift. | > SELECT shiftright(4, 1); 2 |
shiftrightunsigned | shiftrightunsigned(a, b) - Bitwise unsigned right shift. | > SELECT shiftrightunsigned(4, 1); 2 |
sign | sign(x) - Returns the sign of x. | > SELECT sign(40); 1.0 |
signum | signum(x) - Returns the sign of x. | > SELECT signum(40); 1.0 |
sin | sin(x) - Returns the sine of x. | > SELECT sin(0); 0.0 |
sinh | sinh(x) - Returns the hyperbolic sine of x. | > SELECT sinh(0); 0.0 |
size | size(expr) - Returns the size of an array or a map. | > SELECT size(array('b', 'd', 'c', 'a')); 4 |
skewness | skewness(x) - Returns the Skewness value calculated from values of a group. | No example for skewness. |
sort_array | sort_array(array(obj1, obj2, ...), ascendingOrder) - Sorts the input array in ascending order according to the natural ordering of the array elements. Extended Usage: > SELECT sort_array(array('b', 'd', 'c', 'a'), true); 'a', 'b', 'c', 'd' | > SELECT sort_array(array('b', 'd', 'c', 'a'), true); 'a', 'b', 'c', 'd' |
soundex | soundex(str) - Returns soundex code of the string. | > SELECT soundex('Miller'); 'M460' |
space | space(n) - Returns a n spaces string. | > SELECT space(2); ' ' |
spark_partition_id | spark_partition_id() - Returns the current partition id of the Spark task | > SELECT spark_partition_id(); 0 |
split | split(str, regex) - Splits str around occurrences that match regex | > SELECT split('oneAtwoBthreeC', '[ABC]'); ['one', 'two', 'three'] |
sqrt | sqrt(x) - Returns the square root of x. | > SELECT sqrt(4); 2.0 |
stack | stack(n, v1, ..., vk) - Separate v1, ..., vk into n rows. | > SELECT stack(2, 1, 2, 3); [1,2] [3,null] |
std | std(x) - Returns the sample standard deviation calculated from values of a group. | No example for std. |
stddev | stddev(x) - Returns the sample standard deviation calculated from values of a group. | No example for stddev. |
stddev_pop | stddev_pop(x) - Returns the population standard deviation calculated from values of a group. | No example for stddev_pop. |
stddev_samp | stddev_samp(x) - Returns the sample standard deviation calculated from values of a group. | No example for stddev_samp. |
struct | struct(col1, col2, col3, ...) - Creates a struct with the given field values. | No example for struct. |
substr | substr(str, pos[, len]) - Returns the substring of str that starts at pos and is of length len or the slice of byte array that starts at pos and is of length len. | > SELECT substr('Spark SQL', 5); 'k SQL' > SELECT substr('Spark SQL', -3); 'SQL' > SELECT substr('Spark SQL', 5, 1); 'k' |
substring | substring(str, pos[, len]) - Returns the substring of str that starts at pos and is of length len or the slice of byte array that starts at pos and is of length len. | > SELECT substring('Spark SQL', 5); 'k SQL' > SELECT substring('Spark SQL', -3); 'SQL' > SELECT substring('Spark SQL', 5, 1); 'k' |
substring_index | substring_index(str, delim, count) - Returns the substring from str before count occurrences of the delimiter delim. If count is positive, everything to the left of the final delimiter (counting from the left) is returned. If count is negative, everything to the right of the final delimiter (counting from the right) is returned. Substring_index performs a case-sensitive match when searching for delim. | SELECT substring_index('www.apache.org', '.', 2); 'www.apache' |
sum | sum(x) - Returns the sum calculated from values of a group. | No example for sum. |
tan | tan(x) - Returns the tangent of x. | > SELECT tan(0); 0.0 |
tanh | tanh(x) - Returns the hyperbolic tangent of x. | > SELECT tanh(0); 0.0 |
to_date | to_date(expr) - Extracts the date part of the date or datetime expression expr. | > SELECT to_date('2009-07-30 04:17:52'); '2009-07-30' |
to_unix_timestamp | to_unix_timestamp(date[, pattern]) - Returns the UNIX timestamp of the give time. | No example for to_unix_timestamp. |
to_utc_timestamp | to_utc_timestamp(timestamp, string timezone) - Assumes given timestamp is in given timezone and converts to UTC. | to_utc_timestamp(timestamp, string timezone) - Assumes given timestamp is in given timezone and converts to UTC. |
translate | translate(input, from, to) - Translates the input string by replacing the characters present in the from string with the corresponding characters in the to string | > SELECT translate('AaBbCc', 'abc', '123'); 'A1B2C3' |
trim | trim(str) - Removes the leading and trailing space characters from str. | > SELECT trim(' SparkSQL '); 'SparkSQL' |
trunc | trunc(date, fmt) - Returns returns date with the time portion of the day truncated to the unit specified by the format model fmt. | > SELECT trunc('2009-02-12', 'MM') '2009-02-01' > SELECT trunc('2015-10-27', 'YEAR'); '2015-01-01' |
ucase | ucase(str) - Returns str with all characters changed to uppercase | > SELECT ucase('SparkSql'); 'SPARKSQL' |
unbase64 | unbase64(str) - Convert the argument from a base 64 string to binary. | No example for unbase64. |
unhex | unhex(x) - Converts hexadecimal argument to binary. | > SELECT decode(unhex('537061726B2053514C'),'UTF-8'); 'Spark SQL' |
unix_timestamp | unix_timestamp([date[, pattern]]) - Returns the UNIX timestamp of current or specified time. | No example for unix_timestamp. |
upper | upper(str) - Returns str with all characters changed to uppercase | > SELECT upper('SparkSql'); 'SPARKSQL' |
var_pop | var_pop(x) - Returns the population variance calculated from values of a group. | No example for var_pop. |
var_samp | var_samp(x) - Returns the sample variance calculated from values of a group. | No example for var_samp. |
variance | variance(x) - Returns the sample variance calculated from values of a group. | No example for variance. |
weekofyear | weekofyear(param) - Returns the week of the year of the given date. | > SELECT weekofyear('2008-02-20'); 8 |
when | CASE WHEN a THEN b [WHEN c THEN d]* [ELSE e] END - When a = true, returns b; when c = true, return d; else return e. | No example for when. |
window | N/A. | N/A. |
xpath | xpath(xml, xpath) - Returns a string array of values within xml nodes that match the xpath expression | > SELECT xpath('<a><b>b1</b><b>b2</b><b>b3</b><c>c1</c><c>c2</c></a>','a/b/text()'); ['b1','b2','b3'] |
xpath_boolean | xpath_boolean(xml, xpath) - Evaluates a boolean xpath expression. | > SELECT xpath_boolean('<a><b>1</b></a>','a/b'); true |
xpath_double | xpath_double(xml, xpath) - Returns a double value that matches the xpath expression | > SELECT xpath_double('<a><b>1</b><b>2</b></a>','sum(a/b)'); 3.0 |
xpath_float | xpath_float(xml, xpath) - Returns a float value that matches the xpath expression | > SELECT xpath_float('<a><b>1</b><b>2</b></a>','sum(a/b)'); 3.0 |
xpath_int | xpath_int(xml, xpath) - Returns an integer value that matches the xpath expression | > SELECT xpath_int('<a><b>1</b><b>2</b></a>','sum(a/b)'); 3 |
xpath_long | xpath_long(xml, xpath) - Returns a long value that matches the xpath expression | > SELECT xpath_long('<a><b>1</b><b>2</b></a>','sum(a/b)'); 3 |
xpath_number | xpath_number(xml, xpath) - Returns a double value that matches the xpath expression | > SELECT xpath_number('<a><b>1</b><b>2</b></a>','sum(a/b)'); 3.0 |
xpath_short | xpath_short(xml, xpath) - Returns a short value that matches the xpath expression | > SELECT xpath_short('<a><b>1</b><b>2</b></a>','sum(a/b)'); 3 |
xpath_string | xpath_string(xml, xpath) - Returns the text contents of the first xml node that matches the xpath expression | > SELECT xpath_string('<a><b>b</b><c>cc</c></a>','a/c'); cc |
year | year(param) - Returns the year component of the date/timestamp/interval. | > SELECT year('2016-07-30'); 2016 |
| | a | b - Bitwise OR. | > SELECT 3 | 5; 7 |
~ | ~ b - Bitwise NOT. | > SELECT ~ 0; -1 |