分詞統計(一)使用訊飛語言云進行統計
阿新 • • 發佈:2019-01-26
最近想取一個網名,想起中國文化博大精深,如果用古代的唐詩宋詞組合一定有一個意向不到的名字。組合首先要分詞,想起錘子手機有一個很火的功能叫BigBang,它用的是訊飛的語言云,是免費提供的,所以這次使用訊飛的語言云進行分詞,然後隨機組合。另外,還可以進行有趣的資料統計,看看古代的高頻詩詞是哪些?
大概的步驟如下:
1、獲取訊飛語言云ApiKey
2、搭建網路請求
3、選擇詩詞的txt檔案
4、顯示分詞結果
5、儲存結果到sqlite資料庫
6、進行分詞隨機組合
7、利用分詞進行資料統計
使用到的技術:
ButterKnife
Retrofit
1 獲取訊飛語言云ApiKey
首先去訊飛開發者平臺申請語言云的ApiKey
2 搭建網路框架
在工程的build檔案新增
dependencies {
classpath 'com.android.tools.build:gradle:2.3.0'
classpath 'com.neenbedankt.gradle.plugins:android-apt:1.8'
}
在專案的build檔案新增
apply plugin: 'com.android.application' apply plugin: 'com.neenbedankt.android-apt' android { compileSdkVersion 25 buildToolsVersion "25.0.0" defaultConfig { applicationId "com.mwf.analyze" minSdkVersion 15 targetSdkVersion 19 versionCode 1 versionName "1.0" testInstrumentationRunner "android.support.test.runner.AndroidJUnitRunner" } buildTypes { release { minifyEnabled false proguardFiles getDefaultProguardFile('proguard-android.txt'), 'proguard-rules.pro' } } } dependencies { compile fileTree(dir: 'libs', include: ['*.jar']) compile 'com.android.support:appcompat-v7:25.0.0' testCompile 'junit:junit:4.12' compile 'com.android.support:design:25.0.0' compile 'com.android.support:appcompat-v7:25.0.0' compile 'com.squareup.okhttp3:okhttp:3.3.1' compile 'com.squareup.retrofit2:retrofit:2.1.0' compile 'com.squareup.retrofit2:converter-gson:2.1.0' compile 'com.squareup.retrofit2:adapter-rxjava:2.1.0' compile 'io.reactivex:rxandroid:1.1.0' compile 'io.reactivex:rxjava:1.1.0' compile 'com.jakewharton:butterknife:8.2.1' apt 'com.jakewharton:butterknife-compiler:8.2.1' compile 'com.squareup.okhttp3:logging-interceptor:3.3.1' compile 'com.github.angads25:filepicker:1.0.9' }
新增一個常量檔案Constant.java
package com.mwf.analyze;
/**
*常量
*/
public class Constant {
/**
* 伺服器地址
*/
public static String BASEURL="http://ltpapi.voicecloud.cn/";
/**
* 訊飛語言云的ApiKey
*/
public static String APIKEY="寫上你申請的ApiKey";
}
新增一個網路請求的主體檔案RetrofitWrapper.java
package com.mwf.analyze; import android.content.Context; import android.util.Log; import com.google.gson.Gson; import com.google.gson.GsonBuilder; import com.mwf.analyze.utils.ToStringConverterFactory; import okhttp3.OkHttpClient; import okhttp3.logging.HttpLoggingInterceptor; import retrofit2.Retrofit; /** * 網路請求主體 */ public class RetrofitWrapper { private static RetrofitWrapper instance; private Context mContext; private Retrofit mRetrofit; public RetrofitWrapper(String url) { OkHttpClient.Builder builder = new OkHttpClient.Builder(); HttpLoggingInterceptor logging = new HttpLoggingInterceptor(new HttpLoggingInterceptor.Logger() { @Override public void log(String message) { // 列印Log Log.i("OkHttp", message); } }); logging.setLevel(HttpLoggingInterceptor.Level.BODY); OkHttpClient client = builder.addInterceptor(logging) .build(); Gson gson = new GsonBuilder() .setLenient() .create(); mRetrofit = new Retrofit.Builder().baseUrl(url) // .addConverterFactory(GsonConverterFactory.create(gson)) .addConverterFactory(new ToStringConverterFactory()) // .addCallAdapterFactory(RxJavaCallAdapterFactory.create()) .client(client) .build(); } public static RetrofitWrapper getInstance(String url){ if(null == instance){ synchronized (RetrofitWrapper.class){ instance = new RetrofitWrapper(url); } } return instance; } public <T> T create(final Class<T> service) { return mRetrofit.create(service); } }
因為請求的結果是Sting檔案,所以addConverterFactory為自定義的一個工具類ToStringConverterFactory.java
package com.mwf.analyze.utils;
import java.io.IOException;
import java.lang.annotation.Annotation;
import java.lang.reflect.Type;
import okhttp3.MediaType;
import okhttp3.RequestBody;
import okhttp3.ResponseBody;
import retrofit2.Converter;
import retrofit2.Retrofit;
/**
* 此類將結果轉為String
*/
public class ToStringConverterFactory extends Converter.Factory {
private static final MediaType MEDIA_TYPE = MediaType.parse("text/plain");
@Override
public Converter<ResponseBody, ?> responseBodyConverter(Type type, Annotation[] annotations, Retrofit retrofit) {
if (String.class.equals(type)) {
return new Converter<ResponseBody, String>() {
@Override
public String convert(ResponseBody value) throws IOException {
return value.string();
}
};
}
return null;
}
@Override public Converter<?, RequestBody> requestBodyConverter(Type type, Annotation[] parameterAnnotations,
Annotation[] methodAnnotations, Retrofit retrofit) {
if (String.class.equals(type)) {
return new Converter<String, RequestBody>() {
@Override
public RequestBody convert(String value) throws IOException {
return RequestBody.create(MEDIA_TYPE, value);
}
};
}
return null;
}
}
網路請求單例:
package com.mwf.analyze.model;
import android.content.Context;
import com.mwf.analyze.Constant;
import com.mwf.analyze.RetrofitWrapper;
import com.mwf.analyze.bean.FamousInfoReq;
import com.mwf.analyze.intf.IFamousInfo;
import retrofit2.Call;
/**
* 網路請求單例
*/
public class FamousInfoModel {
private static FamousInfoModel famousInfoModel;
private IFamousInfo mIFamousInfo;
public FamousInfoModel(Context context) {
mIFamousInfo = RetrofitWrapper.getInstance(Constant.BASEURL).create(IFamousInfo.class);
}
public static FamousInfoModel getInstance(Context context){
if(famousInfoModel == null) {
famousInfoModel = new FamousInfoModel(context);
}
return famousInfoModel;
}
public Call<String> queryLookUp(FamousInfoReq famousInfoReq) {
Call<String > infoCall = mIFamousInfo.getFamousResult(famousInfoReq.api_key, famousInfoReq.text, famousInfoReq.pattern, famousInfoReq.format);
return infoCall;
}
}
請求引數介面
package com.mwf.analyze.intf;
import retrofit2.Call;
import retrofit2.http.GET;
import retrofit2.http.Query;
/**
* 請求引數介面
*/
public interface IFamousInfo {
@GET("/analysis")
Call<String> getFamousResult(@Query("api_key") String api_key,
@Query("text") String text,
@Query("pattern") String pattern,
@Query("format") String format);
}
請求的引數實體
package com.mwf.analyze.bean;
/**
* 請求的引數實體
*/
public class FamousInfoReq {
/**
* 使用者註冊語音雲服務後獲得的認證標識
*/
public String api_key;
/**
* 待分析的文字
*/
public String text;
/**
* 用以指定分析模式,可選值包括ws(分詞),pos(詞性標註),ner(命名實體識別),
* dp(依存句法分析),srl(語義角色標註),all(全部任務)
*/
public String pattern; //
/**
* 用以指定結果格式型別,可選值包括xml(XML格式),json(JSON格式),conll(CONLL格式),plain(簡潔文字格式)
*/
public String format; //
public String getApi_key() {
return api_key;
}
public void setApi_key(String api_key) {
this.api_key = api_key;
}
public String getText() {
return text;
}
public void setText(String text) {
this.text = text;
}
public String getPattern() {
return pattern;
}
public void setPattern(String pattern) {
this.pattern = pattern;
}
public String getFormat() {
return format;
}
public void setFormat(String format) {
this.format = format;
}
}
主介面的佈局
<?xml version="1.0" encoding="utf-8"?>
<RelativeLayout xmlns:android="http://schemas.android.com/apk/res/android"
android:layout_width="match_parent"
android:layout_height="match_parent"
>
<RelativeLayout
android:id="@+id/search_layout"
android:layout_width="match_parent"
android:layout_height="wrap_content"
android:layout_margin="10dp"
android:gravity="center_vertical"
android:orientation="horizontal">
<EditText
android:id="@+id/edit_keyword"
android:layout_width="match_parent"
android:layout_height="120dp"
android:layout_weight="1"
android:hint="請輸入文字"
android:paddingLeft="10dp"
/>
</RelativeLayout>
<Button
android:id="@+id/button_search"
android:layout_width="wrap_content"
android:layout_height="wrap_content"
android:layout_below="@id/search_layout"
android:layout_centerHorizontal="true"
android:layout_centerVertical="true"
android:layout_marginLeft="5dp"
android:layout_marginRight="15dp"
android:text="輸入框解析"/>
<Button
android:id="@+id/button_file"
android:layout_width="wrap_content"
android:layout_height="wrap_content"
android:layout_below="@id/button_search"
android:layout_centerHorizontal="true"
android:layout_centerVertical="true"
android:layout_marginLeft="5dp"
android:layout_marginRight="15dp"
android:text="檔案解析"/>
<ScrollView android:layout_width="match_parent"
android:layout_height="match_parent"
android:layout_below="@id/button_file"
android:layout_margin="10dp">
<LinearLayout
android:layout_width="match_parent"
android:layout_height="wrap_content"
android:orientation="vertical">
<TextView
android:layout_width="match_parent"
android:layout_height="wrap_content"
android:text="結果:"
android:textSize="20dp"/>
<TextView
android:id="@+id/txt_content"
android:layout_width="match_parent"
android:layout_height="wrap_content"
android:textColor="#000000"/>
</LinearLayout>
</ScrollView>
</RelativeLayout>
主介面的程式碼
package com.mwf.analyze.activity;
import android.os.Bundle;
import android.support.v7.app.AppCompatActivity;
import android.text.TextUtils;
import android.util.Log;
import android.view.View;
import android.widget.Button;
import android.widget.EditText;
import android.widget.TextView;
import com.github.angads25.filepicker.controller.DialogSelectionListener;
import com.github.angads25.filepicker.model.DialogConfigs;
import com.github.angads25.filepicker.model.DialogProperties;
import com.github.angads25.filepicker.view.FilePickerDialog;
import com.mwf.analyze.Constant;
import com.mwf.analyze.R;
import com.mwf.analyze.bean.CloudResultPlainParse;
import com.mwf.analyze.bean.FamousInfoReq;
import com.mwf.analyze.model.FamousInfoModel;
import com.mwf.analyze.utils.FileUtils;
import java.io.File;
import java.util.ArrayList;
import butterknife.BindView;
import butterknife.ButterKnife;
import butterknife.OnClick;
import retrofit2.Call;
import retrofit2.Callback;
import retrofit2.Response;
public class MainActivity extends AppCompatActivity implements View.OnClickListener {
public final String TAG = this.getClass().getName();
private FamousInfoModel famousInfoModel;
@BindView(R.id.edit_keyword)
EditText mEditKeyWord;
@BindView(R.id.button_search)
Button mSerachBtn;
@BindView(R.id.button_file)
Button button_file;
@BindView(R.id.txt_content)
TextView mTxtContent;
@Override
protected void onCreate(Bundle savedInstanceState) {
super.onCreate(savedInstanceState);
setContentView(R.layout.activity_main);
ButterKnife.bind(this);
famousInfoModel = FamousInfoModel.getInstance(this);
getSupportActionBar().hide();
}
@Override
@OnClick({R.id.button_search, R.id.button_file})
public void onClick(View view) {
if (view.getId() == R.id.button_search) {
parseEditext();
} else if (view.getId() == R.id.button_file) {
parseFile();
}
}
/**
* 初始化請求引數
* @param text
* @return
*/
private FamousInfoReq initParams(String text) {
FamousInfoReq mFamousInfoReq = null;
mFamousInfoReq = new FamousInfoReq();
mFamousInfoReq.api_key = Constant.APIKEY;
mFamousInfoReq.text = text;
mFamousInfoReq.pattern = "ws";
mFamousInfoReq.format = "plain";
return mFamousInfoReq;
}
/**
* 解析輸入框內容
*/
private void parseEditext() {
String text = mEditKeyWord.getText().toString();
famousInfoModel.queryLookUp(initParams(text)).enqueue(new Callback<String>() {
@Override
public void onResponse(Call<String> call, Response<String> response) {
String result = response.body().trim();
CloudResultPlainParse parse = new CloudResultPlainParse();
ArrayList<String> list = parse.parse(result);
// Log.e(TAG, "result====" + result);
String string = "";
for (String tmp : list) {
string += tmp + "\n";
Log.e(TAG, tmp.toString());
}
mTxtContent.setText(string);
}
@Override
public void onFailure(Call<String> call, Throwable t) {
}
});
}
/**
* 選擇檔案解析
*/
private void parseFile() {
DialogProperties properties = new DialogProperties();
properties.selection_mode = DialogConfigs.SINGLE_MODE;
properties.selection_type = DialogConfigs.FILE_SELECT;
properties.root = new File(DialogConfigs.DEFAULT_DIR);
properties.error_dir = new File(DialogConfigs.DEFAULT_DIR);
properties.offset = new File(DialogConfigs.DEFAULT_DIR);
properties.extensions = null;
FilePickerDialog dialog = new FilePickerDialog(MainActivity.this, properties);
dialog.setTitle("Select a File");
dialog.setDialogSelectionListener(new DialogSelectionListener() {
@Override
public void onSelectedFilePaths(String[] files) {
String string = FileUtils.readTxtFile(files[0]);
if (!TextUtils.isEmpty(string)) {
Log.e(TAG, string);
mEditKeyWord.setText(string);
parseF(string);
}
}
});
dialog.show();
}
/**
* 解析檔案的網路請求
* @param text
*/
private void parseF(String text) {
famousInfoModel.queryLookUp(initParams(text)).enqueue(new Callback<String>() {
@Override
public void onResponse(Call<String> call, Response<String> response) {
String result = response.body().trim();
CloudResultPlainParse parse = new CloudResultPlainParse();
ArrayList<String> list = parse.parse(result);
// Log.e(TAG, "result====" + result);
String string = "";
for (String tmp : list) {
string += tmp + "\n";
Log.e(TAG, tmp.toString());
}
mTxtContent.setText(string);
}
@Override
public void onFailure(Call<String> call, Throwable t) {
}
});
}
}
檔案操作工具類
package com.mwf.analyze.utils;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
/**
* 檔案管理工具
*/
public class FileUtils {
/**
* 功能:Java讀取txt檔案的內容
* 步驟:1:先獲得檔案控制代碼
* 2:獲得檔案控制代碼當做是輸入一個位元組碼流,需要對這個輸入流進行讀取
* 3:讀取到輸入流後,需要讀取生成位元組流
* 4:一行一行的輸出。readline()。
* 備註:需要考慮的是異常情況
* @param filePath
*/
public static String readTxtFile(String filePath){
try {
// String encoding="GBK";
String encoding="UTF-8";
File file=new File(filePath);
if(file.isFile() && file.exists()){ //判斷檔案是否存在
InputStreamReader read = new InputStreamReader(
new FileInputStream(file),encoding);//考慮到編碼格式
BufferedReader bufferedReader = new BufferedReader(read);
String lineTxt = null;
String result="";
while((lineTxt = bufferedReader.readLine()) != null){
System.out.println(lineTxt);
result+=lineTxt;
}
read.close();
return result;
}else{
System.out.println("找不到指定的檔案");
return null;
}
} catch (Exception e) {
System.out.println("讀取檔案內容出錯");
e.printStackTrace();
return null;
}
}
}
專案地址:玩轉資料統計