統計一篇文章中各英語單詞出現的頻數
阿新 • • 發佈:2018-11-26
package com.icinfo; import java.io.*; import java.util.*; /** * 統計一個檔案中各詞出現的頻率,並列印 */ public class FileWordCount { // 使用HashMap來儲存單詞的頻率 Map<String, Integer> wordCount = new HashMap<>(); public static void main(String[] args) { HashMap<String, Integer> map = (HashMap<String, Integer>) new FileWordCount() .wordCount("C:/Users/hzhb/Desktop/test.txt"); // 自定義排序 List<Map.Entry<String, Integer>> list = new LinkedList<>(); list.addAll(map.entrySet()); list.sort(Comparator.comparingInt(e -> e.getValue())); list.forEach(System.out::println); } /** * @param fileName 檔名(將英文文章複製到一個檔案中去) */ public Map<String, Integer> wordCount(String fileName) { File file = new File(fileName); FileInputStream fis = null; try { fis = new FileInputStream(file); } catch (FileNotFoundException e) { System.out.println("檔案不存在!"); } BufferedReader bufr = new BufferedReader(new InputStreamReader(fis)); String s; try { while ((s = bufr.readLine()) != null) { // 移除字串的前導空白和後尾部空白 s = s.trim(); // 正則表示式:以非字母或者是數字為分隔符,進行分割 // 英文單詞以空格為分隔符,將單詞分隔 String[] str = s.split("(\\s+\\W+)|[\\s+\\W+]"); for (int i = 0; i < str.length; i++) { //並將所有大寫字母轉換為小寫 String currentStr = str[i].toLowerCase(); // 如果HashMap中已有該值,將值加1 if (wordCount.containsKey(currentStr)) { wordCount.put(currentStr, wordCount.get(currentStr) + 1); } else { // 預設初始化該單詞的出現次數為1 wordCount.put(currentStr, 1); } } } } catch (IOException e) { e.printStackTrace(); } // 移除HashMap中的""空字串 wordCount.remove(""); return wordCount; } }