Java代碼:
package utils; import org.mozilla.intl.chardet.nsDetector; import org.mozilla.intl.chardet.nsICharsetDetectionObserver; import java.io.*; import java.util.ArrayList; import java.util.List; public class Gbk2utf8 { public static void gbk2Utf8(String fileName) { BufferedReader reader = null; BufferedWriter writer = null; try { StringBuffer sb = new StringBuffer(); reader = new BufferedReader(new InputStreamReader(new FileInputStream(fileName), "GBK")); writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(fileName+"tmp"), "UTF-8")); String str; while ((str = reader.readLine()) != null) { sb.append(str).append("\r\n"); writer.write(sb.toString()); } } catch (IOException e) { e.printStackTrace(); } finally { if (reader != null) { try { reader.close(); } catch (IOException e) { e.printStackTrace(); } } if (writer != null) { try { writer.close(); } catch (IOException e) { e.printStackTrace(); } } } } /** * 遞歸獲取指定目錄下所有指定類型文件 * * @param strPath * 文件夾地址 * @param suffix * 文件名后綴 * @return */ public static List<File> getFileList(String strPath, String suffix) { List<File> filelist = new ArrayList<File>(); getFileList(strPath, suffix, filelist); return filelist; } public static void getFileList(String strPath, String suffix,List<File> fileList) { File dir = new File(strPath); File[] files = dir.listFiles(); // 該文件目錄下文件全部放入數組 if (files != null) { for (int i = 0; i < files.length; i++) { String fileName = files[i].getName(); if (files[i].isDirectory()) { // 如果是文件夾就遞歸調用 getFileList(files[i].getAbsolutePath(), suffix,fileList); } else if (fileName.endsWith(suffix)) { fileList.add(files[i]); } } } } // 是否找到匹配字符集 private static boolean isFind = false; // 如果完全匹配某個字符集檢測算法, 則該屬性保存該字符集的名稱. 否則(如二進制文件)其值就為默認值 null private static String encoding = null; /** * 獲取文件的編碼 * * @param file * @return 文件編碼,若無,則返回null * @throws IOException */ private static String guessFileCharset(File file) throws IOException { nsDetector det = new nsDetector(); det.Init(new nsICharsetDetectionObserver() { public void Notify(String charset) { isFind = true; encoding = charset; } }); BufferedInputStream bis = new BufferedInputStream(new FileInputStream(file)); byte[] buf = new byte[1024]; int len; boolean done = false; boolean isAscii = true; while ((len = bis.read(buf, 0, buf.length)) != -1) { if (isAscii) { isAscii = det.isAscii(buf, len); } else if (!done) { done = det.DoIt(buf, len, false); } } det.DataEnd(); if (isAscii) { encoding = "ASCII"; isFind = true; } else if (!isFind) { String prob[] = det.getProbableCharsets(); if (prob.length > 0) { encoding = prob[0]; // 在沒有發現情況下,則取第一個可能的編碼 } } return encoding; } public static void main(String[] args) { List<File> files = getFileList("F:\\ppppppppp", ".sql"); for (int i = 0; i < files.size(); i++) { String charset = null; try { System.out.print("正在獲取第"+i+"個文件的編碼"+"==="+files.get(i).getAbsoluteFile()); charset = guessFileCharset(files.get(i).getAbsoluteFile()); } catch (IOException e) { System.err.println("獲取文件編碼發生異常!"); } System.out.println("====="+files.get(i).getAbsoluteFile() + "[" + charset + "]"); if ("GB2312".equals(charset)) { System.out.println("正在獲取第"+i+"個文件的轉化"+"==="+files.get(i).getAbsoluteFile()); gbk2Utf8(files.get(i).getAbsolutePath()); files.get(i).delete(); } } System.out.println("文件總數:"+files.size()); } }
pom文件:
<dependency> <groupId>net.sourceforge.jchardet</groupId> <artifactId>jchardet</artifactId> <version>1.0</version> </dependency>