[pig] 統計行數和單詞個數wordcount
阿新 • • 發佈:2019-01-28
原文連結:
http://blog.csdn.net/hijk139/article/details/8560131
--統計資料的行數
cd hdfs:///
A = LOAD '/logdata/2012*/*/nohup_*' AS (name:chararray) ;
B = GROUP A BY name;
C = FOREACH B GENERATE group, COUNT(A);
D = ORDER C BY ($1);
E = FILTER D BY $1 > 200;
dump E;
--統計單詞的個數
A = LOAD'/logdata/20130131/*/*' AS (line: chararray) ;
B = foreach Agenerate flatten(TOKENIZE((chararray)$0)) as word;
C = group B by word;
D = foreach Cgenerate COUNT(B), group;
E = ORDER D BY ($0);
F = FILTER E BY $0> 200;
DUMP F;
http://salsahpc.indiana.edu/ScienceCloud/pig_word_count_tutorial.htm