一、統計字母的頻率,並按照由大到小的頻率輸出
package org.yuan.HelloWorld; import java.io.FileInputStream; import java.io.IOException; import java.io.InputStreamReader; import java.text.DecimalFormat; import java.util.ArrayList; import java.util.List; public class TestFile1 { public static void main(String[] args)throws IOException { List<Integer> list=new ArrayList<>(); DecimalFormat df=new DecimalFormat("######0.00"); FileInputStream fip = new FileInputStream("d:/Harry Potter and the Sorcerer's Stone.txt"); InputStreamReader reader = new InputStreamReader(fip, "gbk"); StringBuffer sb = new StringBuffer(); while (reader.ready()) { sb.append((char) reader.read()); } reader.close(); fip.close(); int i; String A=sb.toString(); String M="abcdefghijklmnopqrstuvwxyz"; char NUM[]=new char[A.length()]; char Z[]=new char[26]; int X[]=new int[26]; Z=M.toCharArray(); for(int k=0;k<26;k++) { X[k]=0; for(i=0;i<A.length();i++) { NUM[i]=A.charAt(i); if(Z[k]==NUM[i]||Z[k]==ch(NUM[i])) { X[k]++; } } } double sum=0; for(i=0;i<25;i++) for(int k=0;k<25-i;k++) { if(X[k]<X[k+1]) { int temp2=X[k]; X[k]=X[k+1]; X[k+1]=temp2; char temp3=Z[k]; Z[k]=Z[k+1]; Z[k+1]=temp3; } } for(i=0;i<26;i++) { sum=sum+X[i]; } System.out.println("一共有"+sum+"個字母。"); System.out.println("各字母頻率如下:"); for(i=0;i<26;i++) { double jkl=(X[i])/sum*100; System.out.println(Z[i]+":"+df.format(jkl)+"%"); } } static char ch(char c) { if(!(c>=97&&c<=122)) c+=32; return c; } }
二、統計單詞的數量,並輸出出現頻率最高的前N個單詞(N手動輸入)
package org.yuan.HelloWorld; import java.io.*; import java.util.*; import java.util.Map.Entry; public class TestFile2 { public static int n=0; public static void main(String[] args) { Scanner sc=new Scanner(System.in); String s; int count=0; int num=1; //作為FileReader和FileWriter讀取的對象 String file1="d:/Harry Potter and the Sorcerer's Stone.txt"; try { BufferedReader a=new BufferedReader(new FileReader(file1)); StringBuffer c=new StringBuffer(); //將文件內容存入StringBuffer中 while((s = a.readLine()) != null) { //用於拼接字符串 c.append(s); } //將StringBuffer轉換成String,然后再將所有字符轉化成小寫字符 String m=c.toString().toLowerCase(); //匹配由數字和26個字母組成的字符串 String [] d=m.split("[^a-zA-Z0-9]+"); //遍歷數組將其存入Map<String, Integer>中 Map<String , Integer> myTreeMap=new TreeMap<String, Integer>(); for(int i = 0; i < d.length; i++) { //containsKey()方法用於檢查特定鍵是否在TreeMap中映射 if(myTreeMap.containsKey(d[i])) { count = myTreeMap.get(d[i]); myTreeMap.put(d[i], count + 1); } else { myTreeMap.put(d[i], 1); } } //通過比較器實現排序 List<Map.Entry<String, Integer>> list = new ArrayList<Map.Entry<String, Integer>>(myTreeMap.entrySet()); //按降序排序 Collections.sort(list, new Comparator<Map.Entry<String, Integer>>() { public int compare(Entry<String, Integer> k1, Entry<String, Integer> k2) { //返回兩個單詞出現次數較多的那個單詞的出現次數 return k2.getValue().compareTo(k1.getValue()); } }); System.out.println("請輸入N:"); n=sc.nextInt(); for(Map.Entry<String, Integer> map : list) { if(num <= n) { //輸出到程序控制台 System.out.println(map.getKey() + ":" + map.getValue()); num++; } //輸出完畢退出 else break; } //關閉文件指針 a.close(); // b.close(); } catch(FileNotFoundException e) { System.out.println("找不到指定文件"); } catch(IOException e) { System.out.println("文件讀取錯誤"); } } }