上次讀取了excel發現還是很慢(結果集為DataTable),后來研究了一下csv這個文件效率很高呀,特別是針對大文件的時候,話不多說上代碼!
本機配置:win10 i5900F 16G 500G固態
1、csv文件幫助類

1 public static class CsvHelper 2 { 3 /// <summary> 4 /// 創建CSV文件並寫入內容 5 /// </summary> 6 /// <param name="dt">DataTable</param> 7 /// <param name="fileName">文件全名</param> 8 /// <returns>是否寫入成功</returns> 9 public static Boolean SaveCSV(DataTable dt, string fullFileName) 10 { 11 Boolean r = false; 12 FileStream fs = new FileStream(fullFileName, System.IO.FileMode.Create, System.IO.FileAccess.Write); 13 StreamWriter sw = new StreamWriter(fs, System.Text.Encoding.Default); 14 string data = ""; 15 16 //寫出列名稱 17 for (int i = 0; i < dt.Columns.Count; i++) 18 { 19 data += dt.Columns[i].ColumnName.ToString(); 20 if (i < dt.Columns.Count - 1) 21 { 22 data += ","; 23 } 24 } 25 sw.WriteLine(data); 26 27 //寫出各行數據 28 for (int i = 0; i < dt.Rows.Count; i++) 29 { 30 data = ""; 31 for (int j = 0; j < dt.Columns.Count; j++) 32 { 33 data += dt.Rows[i][j].ToString(); 34 if (j < dt.Columns.Count - 1) 35 { 36 data += ","; 37 } 38 } 39 sw.WriteLine(data); 40 } 41 42 sw.Close(); 43 fs.Close(); 44 45 r = true; 46 return r; 47 } 48 49 /// <summary> 50 /// 讀CSV 文件 51 /// </summary> 52 /// <param name="fileName">文件全名</param> 53 /// <returns>DataTable</returns> 54 public static DataTable ReadCSV(string fullFileName) 55 { 56 return ReadCSV(fullFileName, 0, 0, 0, 0, true); 57 } 58 59 /// <summary> 60 /// 讀CSV 文件 61 /// </summary> 62 /// <param name="fileName">文件全名</param> 63 /// <param name="firstRow">開始行</param> 64 /// <param name="firstColumn">開始列</param> 65 /// <param name="getRows">獲取多少行</param> 66 /// <param name="getColumns">獲取多少列</param> 67 /// <param name="haveTitleRow">是有標題行</param> 68 /// <returns>DataTable</returns> 69 public static DataTable ReadCSV(string fullFileName, Int16 firstRow = 0, Int16 firstColumn = 0, Int16 getRows = 0, Int16 getColumns = 0, bool haveTitleRow = true) 70 { 71 DataTable dt = new DataTable(); 72 FileStream fs = new FileStream(fullFileName, System.IO.FileMode.Open, System.IO.FileAccess.Read); 73 StreamReader sr = new StreamReader(fs, System.Text.Encoding.Default); 74 try 75 { 76 string strLine = "";//記錄每次讀取的一行記錄 77 string[] aryLine;//記錄每行記錄中的各字段內容 78 int columnCount = 0; //標示列數 79 bool bCreateTableColumns = false;//是否已建立了表的字段 80 int iRow = 1;//第幾行 81 82 if (firstRow > 0) //去除無用行 83 { 84 for (int i = 1; i < firstRow; i++) 85 { 86 sr.ReadLine(); 87 } 88 } 89 string[] separators = { "," };// { ",", ".", "!", "?", ";", ":", " " }; 90 while ((strLine = sr.ReadLine()) != null)//逐行讀取CSV中的數據 91 { 92 strLine = strLine.Trim(); 93 aryLine = strLine.Split(separators, System.StringSplitOptions.RemoveEmptyEntries); 94 95 if (bCreateTableColumns == false) 96 { 97 bCreateTableColumns = true; 98 columnCount = aryLine.Length; 99 //創建列 100 for (int i = firstColumn; i < (getColumns == 0 ? columnCount : firstColumn + getColumns); i++) 101 { 102 DataColumn dc = new DataColumn(haveTitleRow == true ? aryLine[i] : "COL" + i.ToString()); 103 dt.Columns.Add(dc); 104 } 105 106 bCreateTableColumns = true; 107 108 if (haveTitleRow == true) 109 { 110 continue; 111 } 112 } 113 114 DataRow dr = dt.NewRow(); 115 for (int j = firstColumn; j < (getColumns == 0 ? columnCount : firstColumn + getColumns); j++) 116 { 117 dr[j - firstColumn] = aryLine[j]; 118 } 119 dt.Rows.Add(dr); 120 121 iRow = iRow + 1; 122 if (getRows > 0) 123 { 124 if (iRow > getRows) 125 { 126 break; 127 } 128 } 129 } 130 } 131 catch (Exception ex) 132 { 133 //異常處理 134 } 135 finally 136 { 137 sr.Close(); 138 fs.Close(); 139 } 140 return dt; 141 } 142 143 }
2、使用
2.1、創建csv文件,數據量為100W,21行

1 public void CSV_Create() 2 { 3 string filePath = @"C:\Users\Administrator\Desktop\大數據.csv"; 4 5 #region 填充DataTable 6 DataTable tblDatas = new DataTable("Datas"); 7 DataColumn dc = null; 8 dc = tblDatas.Columns.Add("ID", Type.GetType("System.Int32")); 9 dc.AutoIncrement = true;//自動增加 10 dc.AutoIncrementSeed = 1;//起始為1 11 dc.AutoIncrementStep = 1;//步長為1 12 dc.AllowDBNull = false;// 13 14 for (int i = 1; i < 21; i++) 15 { 16 dc = tblDatas.Columns.Add("p" + i, Type.GetType("System.String")); 17 } 18 19 DataRow newRow = tblDatas.NewRow(); 20 21 for (int i = 0; i < 1000000; i++) 22 { 23 newRow = tblDatas.NewRow(); 24 newRow["p1"] = "大話西游大話西游大話西游大話西游" + i; 25 newRow["p2"] = "大話西游大話西游大話西游大話西游" + i; 26 newRow["p3"] = "大話西游大話西游大話西游大話西游" + i; 27 newRow["p4"] = "大話西游大話西游大話西游大話西游" + i; 28 newRow["p5"] = "大話西游大話西游大話西游大話西游" + i; 29 newRow["p6"] = "大話西游大話西游大話西游大話西游" + i; 30 newRow["p7"] = "大話西游大話西游大話西游大話西游" + i; 31 newRow["p8"] = "大話西游大話西游大話西游大話西游" + i; 32 newRow["p9"] = "大話西游大話西游大話西游大話西游" + i; 33 newRow["p10"] = "大話西游大話西游大話西游大話西游" + i; 34 newRow["p11"] = "大話西游大話西游大話西游大話西游" + i; 35 newRow["p12"] = "大話西游大話西游大話西游大話西游" + i; 36 newRow["p13"] = "大話西游大話西游大話西游大話西游" + i; 37 newRow["p14"] = "大話西游大話西游大話西游大話西游" + i; 38 newRow["p15"] = "大話西游大話西游大話西游大話西游" + i; 39 newRow["p16"] = "大話西游大話西游大話西游大話西游" + i; 40 newRow["p17"] = "大話西游大話西游大話西游大話西游" + i; 41 newRow["p18"] = "大話西游大話西游大話西游大話西游" + i; 42 newRow["p19"] = "大話西游大話西游大話西游大話西游" + i; 43 newRow["p20"] = "大話西游大話西游大話西游大話西游" + i; 44 tblDatas.Rows.Add(newRow); 45 } 46 47 #endregion 48 49 Stopwatch sw = new Stopwatch(); 50 sw.Start(); 51 52 CsvHelper.SaveCSV(tblDatas, filePath); 53 54 System.IO.FileInfo fileInfo = new System.IO.FileInfo(filePath); 55 log.Info("生成.csv文件," + filePath + ",文件大小" + System.Math.Ceiling((fileInfo.Length / 1024.0) / 1024) + " M" + ",耗時:" + sw.Elapsed); 56 }
耗時大概20秒左右,文件大小750M左右。
2.2、讀csv文件

1 public void CSV_Read() 2 { 3 Stopwatch sw = new Stopwatch(); 4 sw.Start(); 5 string path = @"C:\Users\Administrator\Desktop\大數據.csv"; 6 DataTable dt = CsvHelper.ReadCSV(path); 7 log.Info(path + ",文件讀取完成,數據條數" + dt.Rows.Count / 10000 + "萬,耗時:" + sw.Elapsed); 8 }
生成Datatable類型的結果集,耗時10秒左右,測試結果log日志內容如下: