java 讀寫文件,轉碼


Java代碼:

package utils;

import org.mozilla.intl.chardet.nsDetector;
import org.mozilla.intl.chardet.nsICharsetDetectionObserver;

import java.io.*;
import java.util.ArrayList;
import java.util.List;

public class Gbk2utf8 {
    public static void gbk2Utf8(String fileName) {
        BufferedReader reader = null;
        BufferedWriter writer = null;
        try {
            StringBuffer sb = new StringBuffer();
            reader = new BufferedReader(new InputStreamReader(new FileInputStream(fileName), "GBK"));
            writer = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(fileName+"tmp"), "UTF-8"));
            String str;
            while ((str = reader.readLine()) != null) {
                sb.append(str).append("\r\n");
                writer.write(sb.toString());
            }
        } catch (IOException e) {
            e.printStackTrace();
        } finally {
            if (reader != null) {
                try {
                    reader.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
            if (writer != null) {
                try {
                    writer.close();
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }
    }
    /**
     * 遞歸獲取指定目錄下所有指定類型文件
     *
     * @param strPath
     *            文件夾地址
     * @param suffix
     *            文件名后綴
     * @return
     */
    public static List<File> getFileList(String strPath, String suffix) {
        List<File> filelist = new ArrayList<File>();
        getFileList(strPath, suffix, filelist);
        return filelist;
    }
    public static void getFileList(String strPath, String suffix,List<File> fileList) {
        File dir = new File(strPath);
        File[] files = dir.listFiles(); // 該文件目錄下文件全部放入數組
        if (files != null) {
            for (int i = 0; i < files.length; i++) {
                String fileName = files[i].getName();
                if (files[i].isDirectory()) { // 如果是文件夾就遞歸調用
                    getFileList(files[i].getAbsolutePath(), suffix,fileList);
                } else if (fileName.endsWith(suffix)) {
                    fileList.add(files[i]);
                }
            }
        }
    }
    // 是否找到匹配字符集
    private static boolean isFind = false;
    // 如果完全匹配某個字符集檢測算法, 則該屬性保存該字符集的名稱. 否則(如二進制文件)其值就為默認值 null
    private static String encoding = null;

    /**
     * 獲取文件的編碼
     *
     * @param file
     * @return 文件編碼,若無,則返回null
     * @throws IOException
     */
    private static String guessFileCharset(File file) throws IOException {
        nsDetector det = new nsDetector();
        det.Init(new nsICharsetDetectionObserver() {
            public void Notify(String charset) {
                isFind = true;
                encoding = charset;
            }
        });

        BufferedInputStream bis = new BufferedInputStream(new FileInputStream(file));

        byte[] buf = new byte[1024];
        int len;
        boolean done = false;
        boolean isAscii = true;

        while ((len = bis.read(buf, 0, buf.length)) != -1) {
            if (isAscii) {
                isAscii = det.isAscii(buf, len);
            } else if (!done) {
                done = det.DoIt(buf, len, false);
            }
        }
        det.DataEnd();

        if (isAscii) {
            encoding = "ASCII";
            isFind = true;
        } else if (!isFind) {
            String prob[] = det.getProbableCharsets();
            if (prob.length > 0) {
                encoding = prob[0]; // 在沒有發現情況下,則取第一個可能的編碼
            }
        }
        return encoding;
    }

    public static void main(String[] args) {
        List<File> files = getFileList("F:\\ppppppppp", ".sql");
        for (int i = 0; i < files.size(); i++) {
            String charset = null;
            try {
                System.out.print("正在獲取第"+i+"個文件的編碼"+"==="+files.get(i).getAbsoluteFile());
                charset = guessFileCharset(files.get(i).getAbsoluteFile());
            } catch (IOException e) {
                System.err.println("獲取文件編碼發生異常!");
            }
            System.out.println("====="+files.get(i).getAbsoluteFile() + "[" + charset + "]");
            if ("GB2312".equals(charset)) {
                System.out.println("正在獲取第"+i+"個文件的轉化"+"==="+files.get(i).getAbsoluteFile());
                gbk2Utf8(files.get(i).getAbsolutePath());
                files.get(i).delete();
            }
        }
        System.out.println("文件總數:"+files.size());
    }
}

pom文件:

 <dependency>
            <groupId>net.sourceforge.jchardet</groupId>
            <artifactId>jchardet</artifactId>
            <version>1.0</version>
        </dependency>

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM