1. 程式人生 > >統計單詞頻率

統計單詞頻率

nal turn print har map.entry ted cat tree keys

今天老師又安排了一個任務:統計一個文件中出現最多的幾個單詞出現的頻率。

怎麽說呢,還是一點不會,只能上網搜,通過兩小時的奮鬥,我還是沒能做出來,但是我知道了如何從讀取文件中的信息的代碼以及如何統計單詞頻率的代碼;然而,我無法把他們合起來,也無法理解許多代碼的作用,就只知道其功能,所以我想,我還是該再去借一本書隨身背著,然後就是多花時間學習java了。

今晚又經過三個多小時,又搜了搜想了想,還把一些相似的代碼進行對照,然而我還是無法改動代碼分毫,我覺得我很無能,感覺三個多小時是白搭了,一事無成,也開始慌了,真的真的該下功夫學習java了。

以下是我的未實現功能的代碼:

import java.io.BufferedReader;
import java.io.File; import java.io.FileReader; import java.util.Collections; import java.util.HashMap; import java.util.Iterator; import java.util.Map; import java.util.TreeMap; public class test { public static void findEnglishNum(String text){ //找出所有的單詞 String[] array = {".", " ", "?", "!"};
for (int i = 0; i < array.length; i++) { text = text.replace(array[i],","); } String[] textArray = text.split(","); //遍歷 記錄 Map<String, Integer> map = new HashMap<String, Integer>(); for (int i = 0; i < textArray.length; i++) { String key
= textArray[i]; //轉為小寫 String key_l = key.toLowerCase(); if(!"".equals(key_l)){ Integer num = map.get(key_l); if(num == null || num == 0){ map.put(key_l, 1); }else if(num > 0){ map.put(key_l, num+1); } } } //輸出到控制臺 System.out.println("各個單詞出現的頻率為:"); Iterator<String> iter = map.keySet().iterator(); while(iter.hasNext()){ String key = iter.next(); Integer num = map.get(key); System.out.println(key + "\n\t\t" + num + "次\n-------------------"); } } public static void main(String[] args) { fread("G:\\yingyu.txt"); findEnglishNum() } // 讀取文件: public static void fread(String fileurl) { File file = new File(fileurl); BufferedReader bfr = null; try { bfr = new BufferedReader(new FileReader(file)); String tem = null; String value = ""; while ((tem = bfr.readLine()) != null) { value = value + tem; } System.out.println(value); // 將讀取的字符串轉換成字符數組: char[] c = value.toCharArray(); // 定義一個map來存儲結果: // HashMap<Character,Integer> tm = new // HashMap<Character,Integer>(Collections.reverseOrder()); TreeMap<Character, Integer> tm = new TreeMap<Character, Integer>(Collections.reverseOrder());// TreeMap可排序(傳入一個反轉比較器) for (int i = 0; i < c.length; i++) { char charSrc = c[i]; if (tm.containsKey(charSrc)) { // 判斷該鍵的值是否存在 int count = tm.get(charSrc); tm.put(charSrc, count + 1); } else { tm.put(charSrc, 1); } } // 取出Map中的鍵和值 Iterator<Map.Entry<Character, Integer>> titer = tm.entrySet().iterator(); while (titer.hasNext()) { Map.Entry<Character, Integer> map = titer.next(); char key = map.getKey(); int valu = map.getValue(); System.out.println(key + "出現過" + valu + "次!"); } } catch (Exception e) { System.err.println("文件讀取錯誤"); } finally { try { if (bfr != null) { bfr.close(); } } catch (Exception e2) { System.err.println("文件關閉錯誤"); } } } }
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Scanner;

public class tongji {
     public static String txt2String(File file){
            StringBuilder result = new StringBuilder();
            try{
                BufferedReader br = new BufferedReader(new FileReader(file));//構造一個BufferedReader類來讀取文件
                String s = null;
                while((s = br.readLine())!=null){//使用readLine方法,一次讀一行
                    result.append(System.lineSeparator()+s);
                }
                br.close();    
            }catch(Exception e){
                e.printStackTrace();
            }
            return result.toString();
        }
     //讀取文件信息的方法。
    
     
     
     
     
     
    
     public static void findEnglishNum(String text){  
         //找出所有的單詞  
         String[] array = {".", " ", "?", "!"};  
         for (int i = 0; i < array.length; i++) {  
             text = text.replace(array[i],",");  
         }  
         String[] textArray = text.split(",");  
           
         //遍歷 記錄  
         Map<String, Integer> map = new HashMap<String, Integer>();  
         for (int i = 0; i < textArray.length; i++) {  
             String key = textArray[i];  
             //轉為小寫  
             String key_l = key.toLowerCase();  
             if(!"".equals(key_l)){  
                 Integer num = map.get(key_l);  
                 if(num == null || num == 0){  
                     map.put(key_l, 1);  
                 }else if(num > 0){  
                     map.put(key_l, num+1);  
                 }  
             }  
         }  
         //輸出到控制臺  
         System.out.println("各個單詞出現的頻率為:");  
         Iterator<String> iter = map.keySet().iterator();  
         while(iter.hasNext()){  
             String key = iter.next();  
             Integer num = map.get(key);  
             System.out.println(key + "\n\t\t" + num + "次\n-------------------");  
         }  
     }
     
     

     
     public static void main(String[] arg) {
        File file = new File("G:/yingyu.txt");
        System.out.println(txt2String(file));
        
        
        
    
        
        String abc = "Welcome welcome to ADempiere, a commons-based peer-production of Open Source ERP Applications. This Wiki is for the global community to contribute and share know-how and domain expertise. We hope you can find as much open information and participate in making it most usable for everyone. This project has a bazaar of Citizens with a Community Council Team which work in theFunctional Team and Technical Team along the Software Development Procedure supported and funded by the foundation ADempiere";  
       
        findEnglishNum(abc);
        
        
    }
}

統計單詞頻率