C#快速隨機按行讀取大型文本文件


下面是我實現的一個數據文件隨機讀取類,可以隨機讀取大型文本文件的某一行。在我機器上對一個130MB的文本文件,讀取第200000的速度從傳統做法的400ms提高到了3ms。
一般對文本文件進行讀取時,一般采用ReadLine()進行逐行讀取。在這種情況下,C#內的FileStream和BufferedStream類處理綽綽有余了。它不會將整個文件全部讀入,而是有緩沖的讀。但是,要想隨機讀取某一行,在行數據長度不統一的情況下,如果每次這樣遍歷到指定行,其效率顯然是很低下的。
當然,代價也是有的,引入了第一次打開文件的打開時間,且占用了少部分內存(占用多少是可以設置的,當然占得越小速度也越慢,但最大值也比全部讀入要小很多)。

(對網絡代碼進行部分改寫)

using System;
using System.Collections.Generic;
using System.Text;
using System.Collections;
using System.Threading;
using System.IO;

namespace DataBuffer
{
    public static class FileConfig
    {
        public static int STREAM_BUFFER_SIZE = 1024000;
        public static int MAP_DISTANCE = 10;
    }

    public class DataFile
    {
        ///
        /// 數據文件名
        ///
        public string fileName = "";
        ///
        /// 初始化讀取完標志
        ///
        public bool done = false;

        ///
        /// 當前流位置
        ///
        public long Position = 0;

        ///
        /// 文件頭部信息
        ///
        private Hashtable head = new Hashtable();
        public Hashtable Head { get { return head; } set { head = value; } }

        ///
        /// 文件地圖
        ///       
        private ArrayList map = new ArrayList();
        public ArrayList Map { get { return map; } set { map = value; } }

        ///
        /// 文件數據行行數
        ///       
        private long lines = 0;
        public long Lines { get { return lines; } set { lines = value; } }
    }

    public class DataBuffer
    {
        private FileStream fs = null;
        private BufferedStream bs = null;
        private StreamReader sr = null;
        private StreamWriter sw = null;
        ///
        /// 文件信息數據結構
        ///
        public DataFile dataFile = new DataFile();

        public DataBuffer(string name)
        {
            dataFile.fileName = name;
        }

        ///
        /// 打開文件
        ///
        public bool Open()
        {
            try
            {
                //初始化各流
                fs = new FileStream(dataFile.fileName, FileMode.Open, FileAccess.ReadWrite);
                bs = new BufferedStream(fs, FileConfig.STREAM_BUFFER_SIZE);
                sr = new StreamReader(fs);
                sw = new StreamWriter(fs);
                Thread initFile = new Thread(new ThreadStart(InitDataFile));
                initFile.Start();
                return true;
            }
            catch (Exception ee)
            {
                ErrorHandler.ErrorHandler eh = new ErrorHandler.ErrorHandler(ee, "文件打開");
                return false;
            }
        }

        private void InitDataFile()
        {
            //另開一個讀取流
            BufferedStream bs = new BufferedStream(fs);
            StreamReader sr = new StreamReader(bs);

            //讀入數據文件頭信息。共14行
            string thisLine = NextLine(ref sr);
            dataFile.Head.Add("Subject", thisLine.Substring(11));

            thisLine = NextLine(ref sr);
            dataFile.Head.Add("Date", thisLine.Substring(8));

            thisLine = NextLine(ref sr);
            dataFile.Head.Add("Time", thisLine.Substring(8));

            thisLine = NextLine(ref sr);
            dataFile.Head.Add("Channels", thisLine.Substring(12));

            thisLine = NextLine(ref sr);
            dataFile.Head.Add("Rate", thisLine.Substring(8));

            thisLine = NextLine(ref sr);
            dataFile.Head.Add("Type", thisLine.Substring(8));

            thisLine = NextLine(ref sr);
            dataFile.Head.Add("Rows", thisLine.Substring(8));

            thisLine = NextLine(ref sr);
            thisLine = NextLine(ref sr);
            dataFile.Head.Add("Electrode Labels", thisLine);
            thisLine = NextLine(ref sr);
            thisLine = NextLine(ref sr);
            thisLine = NextLine(ref sr);
            thisLine = NextLine(ref sr);
            thisLine = NextLine(ref sr);
            //降低自己的優先級
            Thread.CurrentThread.Priority = ThreadPriority.BelowNormal;

            //數行數,建立地圖
            long lines = 1;
            //在地圖中加入首條數據的位置信息
            dataFile.Map.Add(dataFile.Position);
            //順序建立文件地圖
            while (!sr.EndOfStream)
            {
                thisLine = NextLine(ref sr);
                if ((++lines) % FileConfig.MAP_DISTANCE == 0)
                {
                    dataFile.Map.Add(dataFile.Position);
                }
            }
            dataFile.Lines = lines;
            dataFile.done = true;
        }

        ///
        /// 文件關閉
        ///
        public bool Close()
        {
            try
            {
                //順序關閉各流
                sw.Close();
                sr.Close();
                bs.Close();
                fs.Close();
                return true;
            }
            catch (Exception ee)
            {
                ErrorHandler.ErrorHandler eh = new ErrorHandler.ErrorHandler(ee, "文件關閉");
                return false;
            }
        }

        ///
        /// 順序讀取下一行。效率低不建議大規模使用,只在打開文件的時候使用一次
        ///
        ///
        public string NextLine(ref StreamReader sr)
        {
            string next = sr.ReadLine();
            //+2是指Windows換行回車。Linux下要改為+1
            dataFile.Position += next.Length + 2;
            return next;
        }

        //指定的目標行內容
        public string ReadLine(long line)
        {
            try
            {
                //如果載入完畢
                if (dataFile.done)
                {
                    //確定數據塊索引號
                    int index = (int)line / FileConfig.MAP_DISTANCE;
                    //移動到指定位置
                    bs.Seek(long.Parse(dataFile.Map[index].ToString()), SeekOrigin.Begin);
                    //創建流讀取器
                    sr = new StreamReader(bs);
                    //移動到指定行
                    for (int i = 1; i <= (line - index * FileConfig.MAP_DISTANCE); i++)
                    {
                        sr.ReadLine();
                    }
                    //返回指定行的值
                    return sr.ReadLine();
                }
                else
                {
                    return "";
                }
            }
            catch (Exception ee)
            {
                ErrorHandler.ErrorHandler eh = new ErrorHandler.ErrorHandler(ee, "文件讀取");
                return "";
            }
        }
    }

}


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM