JAVA使用NIO技術按行讀寫大文件並且完美解決中文亂碼問題

本文轉載自查看原文 2020-01-15 16:22 1046

假設我們一次讀取的字節是從下圖的start到end，因為結尾是漢字，所以有幾率出現上述的情況。

解決方法如下：將第9行這半行(第9行陰影的部分)跟上一次讀取留下來的半行(第9行沒陰影的部分)按順序存放在字節數組，然后轉成字符串；中間第10行到第17行正常轉換成字符串；第18行這半行(第18行陰影的部分)留着跟下一次讀取的第1行(第18行沒陰影的部分)連接成一行，因為是先拼接成字節數組再轉字符串，因此不會出現亂碼的情況。

package com.chillax.imp;  
  
import java.io.File;  
import java.io.IOException;  
import java.io.RandomAccessFile;  
import java.nio.ByteBuffer;  
import java.nio.channels.FileChannel;  
import java.util.ArrayList;  
import java.util.Date;  
import java.util.List;  
  
/** 
 * NIO讀取百萬級別文件 
 * @author Chillax 
 * 
 */  
public class NIO {  
  
    public static void main(String args[]) throws Exception {  
  
        int bufSize = 1000000;//一次讀取的字節長度  
        File fin = new File("D:\\test\\20160622_627975.txt");//讀取的文件  
        File fout = new File("D:\\test\\20160622_627975_1.txt");//寫出的文件  
        Date startDate = new Date();  
        FileChannel fcin = new RandomAccessFile(fin, "r").getChannel();  
        ByteBuffer rBuffer = ByteBuffer.allocate(bufSize);  
  
        FileChannel fcout = new RandomAccessFile(fout, "rws").getChannel();  
        ByteBuffer wBuffer = ByteBuffer.allocateDirect(bufSize);  
  
        readFileByLine(bufSize, fcin, rBuffer, fcout, wBuffer);  
        Date endDate = new Date();  
          
        System.out.print(startDate+"|"+endDate);//測試執行時間  
        if(fcin.isOpen()){  
            fcin.close();  
        }  
        if(fcout.isOpen()){  
            fcout.close();  
        }  
    }  
  
    public static void readFileByLine(int bufSize, FileChannel fcin,  
            ByteBuffer rBuffer, FileChannel fcout, ByteBuffer wBuffer) {  
        String enter = "\n";  
        List<String> dataList = new ArrayList<String>();//存儲讀取的每行數據  
        byte[] lineByte = new byte[0];  
          
        String encode = "GBK";  
//      String encode = "UTF-8";  
        try {  
            //temp：由於是按固定字節讀取，在一次讀取中，第一行和最后一行經常是不完整的行，因此定義此變量來存儲上次的最后一行和這次的第一行的內容，  
            //並將之連接成完成的一行，否則會出現漢字被拆分成2個字節，並被提前轉換成字符串而亂碼的問題  
            byte[] temp = new byte[0];  
            while (fcin.read(rBuffer) != -1) {//fcin.read(rBuffer)：從文件管道讀取內容到緩沖區(rBuffer)  
                int rSize = rBuffer.position();//讀取結束后的位置，相當於讀取的長度  
                byte[] bs = new byte[rSize];//用來存放讀取的內容的數組  
                rBuffer.rewind();//將position設回0,所以你可以重讀Buffer中的所有數據,此處如果不設置,無法使用下面的get方法  
                rBuffer.get(bs);//相當於rBuffer.get(bs,0,bs.length())：從position初始位置開始相對讀,讀bs.length個byte,並寫入bs[0]到bs[bs.length-1]的區域  
                rBuffer.clear();  
                  
                int startNum = 0;  
                int LF = 10;//換行符  
                int CR = 13;//回車符  
                boolean hasLF = false;//是否有換行符  
                for(int i = 0; i < rSize; i++){  
                    if(bs[i] == LF){  
                        hasLF = true;  
                        int tempNum = temp.length;  
                        int lineNum = i - startNum;  
                        lineByte = new byte[tempNum + lineNum];//數組大小已經去掉換行符  
                          
                        System.arraycopy(temp, 0, lineByte, 0, tempNum);//填充了lineByte[0]~lineByte[tempNum-1]  
                        temp = new byte[0];  
                        System.arraycopy(bs, startNum, lineByte, tempNum, lineNum);//填充lineByte[tempNum]~lineByte[tempNum+lineNum-1]  
                          
                        String line = new String(lineByte, 0, lineByte.length, encode);//一行完整的字符串(過濾了換行和回車)  
                        dataList.add(line);  
//                      System.out.println(line);  
                        writeFileByLine(fcout, wBuffer, line + enter);  
                          
                        //過濾回車符和換行符  
                        if(i + 1 < rSize && bs[i + 1] == CR){  
                            startNum = i + 2;  
                        }else{  
                            startNum = i + 1;  
                        }  
                          
                    }  
                }  
                if(hasLF){  
                    temp = new byte[bs.length - startNum];  
                    System.arraycopy(bs, startNum, temp, 0, temp.length);  
                }else{//兼容單次讀取的內容不足一行的情況  
                    byte[] toTemp = new byte[temp.length + bs.length];  
                    System.arraycopy(temp, 0, toTemp, 0, temp.length);  
                    System.arraycopy(bs, 0, toTemp, temp.length, bs.length);  
                    temp = toTemp;  
                }  
            }  
            if(temp != null && temp.length > 0){//兼容文件最后一行沒有換行的情況  
                String line = new String(temp, 0, temp.length, encode);  
                dataList.add(line);  
//              System.out.println(line);  
                writeFileByLine(fcout, wBuffer, line + enter);  
            }  
        } catch (IOException e) {  
            e.printStackTrace();  
        }   
    }  
  
    /** 
     * 寫到文件上 
     * @param fcout 
     * @param wBuffer 
     * @param line 
     */  
    @SuppressWarnings("static-access")  
    public static void writeFileByLine(FileChannel fcout, ByteBuffer wBuffer,  
            String line) {  
        try {  
            fcout.write(wBuffer.wrap(line.getBytes("UTF-8")), fcout.size());  
        } catch (IOException e) {  
            e.printStackTrace();  
        }  
    }  
}

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 JAVA之NIO按行讀取大文件 Java IO流讀取中文文本文件亂碼問題，完美解決完美解決Informix的中文亂碼問題 java大文件讀寫操作，java nio 之MappedByteBuffer，高效文件/內存映射完美解決PHP中文亂碼 Mysql 中文亂碼問題完美解決方案 js url傳值中文亂碼完美解決(JAVA) java filechannel大文件的讀寫 Java寫Xml文件中文亂碼問題【JAVA】讀取txt文件中文亂碼問題