1. 程式人生 > 實用技巧 >C#(五)基礎篇—字串

C#(五)基礎篇—字串

一、hive用本地檔案進行詞頻統計

1.準備本地txt檔案

1 2 echo "hadoop hbase" > f1.txt echo "hadoop hive" > f2.txt

 

2.啟動hadoop,啟動hive

3.建立資料庫,建立文字表

use hive(建立資料庫,選擇資料庫)
create table if not exists wctext(line string); 
show talbes;

4.對映本地檔案的資料到文字表中

load data local inpath '/home/hadoop/wc/f1.txt' into table wctext;
load data local inpath '/home/hadoop/wc/f2.txt' into table wctext;

5.hql語句進行詞頻統計交將結果儲存到結果表中。

select word,count(1) as count from (select explode(split(line,' ')) as word from wctext) w group by word order by word;
create table wc as select word,count(1) as count from (select explode(split(line,' ')) as word from wctext) w group by word order by word;

6.檢視統計結果

二、hive用HDFS上的檔案進行詞頻統計

1.準備電子書或其它大的文字檔案

2.將文字檔案上傳到HDFS上

hdfs dfs -put story.txt  input/wcHive/

3.建立文字表

create table docs(line string);

4.對映HDFS中的檔案資料到文字表中

create table docs(line string);

load data inpath '/user/hadoop/input/wcHive/story.txt' overwrite into table docs;

5.hql語句進行詞頻統計交將結果儲存到結果表中

create table word_count as select word,count(1) as count from (select explode(split(line,' ')) as word from docs) word group by word order by word;

6.檢視統計結果

show tables;
select * from word_count;