hive 自定義函式
阿新 • • 發佈:2019-01-02
資料格式
zhangsan | a,b,c,d,e,f,g
lisi | h,i,j,k,l,m,n
結構:
zhangsan a
zhangsan b
zhagnsan c
zhangsan d
zhangsan e
zhangsan f
zhangsan g
list h
list i
list j
lisi k
lisi l
list m
lisi n
----------------------------------
package com.snda.hive.aaudf; import java.util.ArrayList; import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; public class ExplodeMap extends GenericUDTF{ @Override public void close() throws HiveException { // TODO Auto-generated method stub } @Override public StructObjectInspector initialize(ObjectInspector[] args) throws UDFArgumentException { if (args.length != 2) { throw new UDFArgumentLengthException("ExplodeMap takes only two argument"); } if (args[0].getCategory() != ObjectInspector.Category.PRIMITIVE) { throw new UDFArgumentException("ExplodeMap takes string as a parameter"); } ArrayList<String> fieldNames = new ArrayList<String>(); ArrayList<ObjectInspector> fieldOIs = new ArrayList<ObjectInspector>(); fieldNames.add("col1"); fieldOIs.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector); return ObjectInspectorFactory.getStandardStructObjectInspector(fieldNames,fieldOIs); } @Override public void process(Object[] args) throws HiveException { String input = args[0].toString(); String prex = args[1].toString(); String[] strList = input.split(prex); for(String line:strList){ try{ String[] result = line.split("XXXX"); forward(result); }catch(Exception e){ continue; } } } }
create table testtable(
name string,
address string
)
row format delimited fields terminated by '|' lines terminated by '\n' stored as textfile;
load data local inpath '/home/hadoop/data/test.txt' overwrite into table testtable;
add jar hdfs://192.168.1.30:9000/ExplodeMap.jar;
create temporary function explode as 'com.snda.hive.aaudf.ExplodeMap';
select name,adid from testtable LATERAL VIEW explode(address,',') adTable as adid;