1 /// <summary> 2 /// 單個元素支持包括tab,換行回車(\r\n),空內容等在內的所有文本字符 (在使用時請確定文件的編碼方式) 3 /// 可指定元素分割符,行非官方必須為\r\n(\r\n可以作為內容出現在元素中),轉義字符必須為". 4 /// 轉義所有的引號必須出現在首尾(如果不在首尾,則不會按轉義符處理,直接作為引號處理)[excel可以讀取轉義出現在中間的情況,<br> ///而本身存儲不會使用這種方式,保存時並會強制修復這種異常,所以這里遇到中間轉義的情況直接拋出指定異常] 5 /// 如果在被轉義的情況下需要出現引號,則使用2個引號代替(如果需要在首部使用雙引號,則需要轉義該元素,其他地方可直接使用)<br> ///(excel對所有雙引號都進行轉義,無論其出現位置,對於保存方式可以選擇是否按excel的方式進行保存) 6 /// 每一行的結尾是不需要逗號結束的,如果多加一個逗號則標識該行會多一個空元素 7 /// 空行也是一個空元素,一個逗號是2個空元素,所以不可能出現有的行元素為空 8 /// 9 /// </summary> 10 public sealed class CsvFileHelper : IDisposable 11 { 12 #region Members 13 14 //private FileStream _fileStream; 15 private Stream _stream; 16 private StreamReader _streamReader; 17 //private StreamWriter _streamWriter; 18 //private Stream _memoryStream; 19 private Encoding _encoding; 20 //private readonly StringBuilder _columnBuilder = new StringBuilder(100); 21 private Type _type = Type.File; 22 private bool _trimColumns = false; 23 24 private char _csvSeparator = ','; 25 26 27 #endregion Members 28 29 #region Properties 30 31 /// <summary> 32 /// Gets or sets whether column values should be trimmed 33 /// </summary> 34 public bool TrimColumns 35 { 36 get { return _trimColumns; } 37 set { _trimColumns = value; } 38 } 39 40 public Type DataSouceType 41 { 42 get { return _type; } 43 } 44 45 /// <summary> 46 /// get or set Csv Separator (Default Values is ,) 47 /// </summary> 48 public char CsvSeparator 49 { 50 get { return _csvSeparator; } 51 set { _csvSeparator = value; } 52 } 53 #endregion Properties 54 55 #region Enums 56 57 /// <summary> 58 /// Type enum 59 /// </summary> 60 public enum Type 61 { 62 File, 63 Stream 64 } 65 66 #endregion Enums 67 68 #region Methods 69 70 /// <summary> 71 /// Initialises the reader to work from a file 72 /// </summary> 73 /// <param name="filePath">File path</param> 74 public CsvFileHelper(string filePath) : this(filePath, Encoding.Default) 75 { 76 } 77 78 /// <summary> 79 /// Initialises the reader to work from a file 80 /// </summary> 81 /// <param name="filePath">File path</param> 82 /// <param name="encoding">Encoding</param> 83 public CsvFileHelper(string filePath, Encoding encoding) 84 { 85 _type = Type.File; 86 if (!File.Exists(filePath)) 87 { 88 throw new FileNotFoundException(string.Format("The file '{0}' does not exist.", filePath)); 89 } 90 //_stream = File.OpenRead(filePath); <br> //return a FileStream (OpenRead 源碼就是 return new FileStream(path, FileMode.OpenOrCreate, FileAccess.Write, FileShare.None);) 91 _stream = new FileStream(filePath, FileMode.Open, FileAccess.Read, FileShare.ReadWrite); 92 _stream.Position = 0; 93 _encoding = (encoding ?? Encoding.Default); 94 _streamReader = new StreamReader(_stream, _encoding); 95 } 96 97 98 /// <summary> 99 /// Initialises the reader to work from an existing stream 100 /// </summary> 101 /// <param name="stream">Stream</param> 102 public CsvFileHelper(Stream stream) : this(stream, Encoding.Default) 103 { 104 } 105 106 /// <summary> 107 /// Initialises the reader to work from an existing stream 108 /// </summary> 109 /// <param name="stream">Stream</param> 110 /// <param name="encoding">Encoding</param> 111 public CsvFileHelper(Stream stream, Encoding encoding) 112 { 113 _type = Type.Stream; 114 if (stream == null) 115 { 116 throw new ArgumentNullException("The supplied stream is null."); 117 } 118 _stream = stream; 119 _stream.Position = 0; 120 _encoding = (encoding ?? Encoding.Default); 121 _streamReader = new StreamReader(_stream, _encoding); 122 } 123 124 /// <summary> 125 /// Initialises the reader to work from an existing stream (with the Separator char) 126 /// </summary> 127 /// <param name="stream">Stream</param> 128 /// <param name="encoding">Encoding</param> 129 /// <param name="yourSeparator"> the Separator char</param> 130 public CsvFileHelper(Stream stream, Encoding encoding, char yourSeparator) : this(stream, encoding) 131 { 132 CsvSeparator = yourSeparator; 133 } 134 135 136 private List<string> ParseLine(string line) 137 { 138 StringBuilder _columnBuilder = new StringBuilder(); 139 List<string> Fields = new List<string>(); 140 bool inColumn = false; //是否是在一個列元素里 141 bool inQuotes = false; //是否需要轉義 142 bool isNotEnd = false; //讀取完畢未結束轉義 143 _columnBuilder.Remove(0, _columnBuilder.Length); 144 145 146 // Iterate through every character in the line 147 for (int i = 0; i < line.Length; i++) 148 { 149 char character = line[i]; 150 151 // If we are not currently inside a column 152 if (!inColumn) 153 { 154 // If the current character is a double quote then the column value is contained within 155 // double quotes, otherwise append the next character 156 inColumn = true; 157 if (character == '"') 158 { 159 inQuotes = true; 160 continue; 161 } 162 163 } 164 165 // If we are in between double quotes 166 if (inQuotes) 167 { 168 if ((i + 1) == line.Length)//這個字符已經結束了整行 169 { 170 if (character == '"') //正常轉義結束,且該行已經結束 171 { 172 inQuotes = false; 173 continue; //當前字符不用添加,跳出后直結束后會添加該元素 174 } 175 else //異常結束,轉義未收尾 176 { 177 isNotEnd = true; 178 } 179 } 180 else if (character == '"' && line[i + 1] == _csvSeparator) //結束轉義,且后面有可能還有數據 181 { 182 inQuotes = false; 183 inColumn = false; 184 i++; //跳過下一個字符 185 } 186 else if (character == '"' && line[i + 1] == '"') //雙引號轉義 187 { 188 i++; //跳過下一個字符 189 } 190 else if (character == '"') //雙引號單獨出現(這種情況實際上已經是格式錯誤,為了兼容可暫時不處理) 191 { 192 throw new Exception(string.Format("[{0}]:格式錯誤,錯誤的雙引號轉義 near [{1}] ", "ParseLine", line)); 193 } 194 //其他情況直接跳出,后面正常添加 195 196 } 197 else if (character == _csvSeparator) 198 inColumn = false; 199 200 // If we are no longer in the column clear the builder and add the columns to the list 201 if (!inColumn) //結束該元素時inColumn置為false,並且不處理當前字符,直接進行Add 202 { 203 Fields.Add(TrimColumns ? _columnBuilder.ToString().Trim() : _columnBuilder.ToString()); 204 _columnBuilder.Remove(0, _columnBuilder.Length); 205 206 } 207 else // append the current column 208 _columnBuilder.Append(character); 209 } 210 211 // If we are still inside a column add a new one (標准格式一行結尾不需要逗號結尾,而上面for是遇到逗號才添加的,為了兼容最后還要添加一次) 212 if (inColumn) 213 { 214 if (isNotEnd) 215 { 216 _columnBuilder.Append("\r\n"); 217 } 218 Fields.Add(TrimColumns ? _columnBuilder.ToString().Trim() : _columnBuilder.ToString()); 219 } 220 //如果inColumn為false,說明已經添加,因為最后一個字符為分隔符,所以后面要加上一個空元素 221 //另外一種情況是line為""空行,(空行也是一個空元素,一個逗號是2個空元素),正好inColumn為默認值false,在此處添加一空元素 222 else 223 { 224 Fields.Add(""); 225 } 226 227 228 return Fields; 229 } 230 231 /// <summary> 232 /// 處理未完成的Csv單行 233 /// </summary> 234 /// <param name="line">數據源</param> 235 /// <returns>元素列表</returns> 236 private List<string> ParseContinueLine(string line) 237 { 238 StringBuilder _columnBuilder = new StringBuilder(); 239 List<string> Fields = new List<string>(); 240 _columnBuilder.Remove(0, _columnBuilder.Length); 241 if (line == "") 242 { 243 Fields.Add("\r\n"); 244 return Fields; 245 } 246 247 for (int i = 0; i < line.Length; i++) 248 { 249 char character = line[i]; 250 251 if ((i + 1) == line.Length)//這個字符已經結束了整行 252 { 253 if (character == '"') //正常轉義結束,且該行已經結束 254 { 255 Fields.Add(TrimColumns ? _columnBuilder.ToString().TrimEnd() : _columnBuilder.ToString()); 256 return Fields; 257 } 258 else //異常結束,轉義未收尾 259 { 260 _columnBuilder.Append("\r\n"); 261 Fields.Add(_columnBuilder.ToString()); 262 return Fields; 263 } 264 } 265 else if (character == '"' && line[i + 1] == _csvSeparator) //結束轉義,且后面有可能還有數據 266 { 267 Fields.Add(TrimColumns ? _columnBuilder.ToString().TrimEnd() : _columnBuilder.ToString()); 268 i++; //跳過下一個字符 269 Fields.AddRange(ParseLine(line.Remove(0, i + 1))); 270 break; 271 } 272 else if (character == '"' && line[i + 1] == '"') //雙引號轉義 273 { 274 i++; //跳過下一個字符 275 } 276 else if (character == '"') //雙引號單獨出現(這種情況實際上已經是格式錯誤,轉義用雙引號一定是【,"】【",】形式,<br> //包含在里面的雙引號需要使用一對雙引號進行轉義) 277 { 278 throw new Exception(string.Format("[{0}]:格式錯誤,錯誤的雙引號轉義 near [{1}]", "ParseContinueLine", line)); 279 } 280 _columnBuilder.Append(character); 281 } 282 return Fields; 283 } 284 285 public List<List<string>> GetListCsvData() 286 { 287 _stream.Position = 0; 288 List<List<string>> tempListCsvData = new List<List<string>>(); 289 bool isNotEndLine = false; 290 //這里的ReadLine可能把轉義的/r/n分割,需要后面單獨處理 291 string tempCsvRowString = _streamReader.ReadLine(); 292 while (tempCsvRowString != null) 293 { 294 List<string> tempCsvRowList; 295 if (isNotEndLine) 296 { 297 tempCsvRowList = ParseContinueLine(tempCsvRowString); 298 isNotEndLine = (tempCsvRowList.Count > 0 && tempCsvRowList[tempCsvRowList.Count - 1].EndsWith("\r\n")); 299 List<string> myNowContinueList = tempListCsvData[tempListCsvData.Count - 1]; 300 myNowContinueList[myNowContinueList.Count - 1] += tempCsvRowList[0]; 301 tempCsvRowList.RemoveAt(0); 302 myNowContinueList.AddRange(tempCsvRowList); 303 } 304 else 305 { 306 tempCsvRowList = ParseLine(tempCsvRowString); 307 isNotEndLine = (tempCsvRowList.Count > 0 && tempCsvRowList[tempCsvRowList.Count - 1].EndsWith("\r\n")); 308 tempListCsvData.Add(tempCsvRowList); 309 } 310 tempCsvRowString = _streamReader.ReadLine(); 311 } 312 return tempListCsvData; 313 } 314 315 public void Dispose() 316 { 317 if (_streamReader != null) 318 { 319 _streamReader.Dispose(); 320 } 321 if (_stream != null) 322 { 323 _stream.Dispose(); 324 } 325 } 326 327 #endregion 328 329 #region StaticTool 330 331 #region 編碼方式可接受值 332 //請考慮讓應用程序使用 UTF-8 或 Unicode (UTF-16) 作為默認編碼。大多數其他編碼要么不完整並將許多字符轉換為“?”,<br> //要么在不同的平台上具有稍有不同的行為。非 Unicode 編碼通常具有多義性,應用程序則不再試圖確定合適的編碼,也不再提供用戶用來修復文本語言或編碼的更正下拉菜單。 333 /* 334 This code produces the following output. 335 336 CodePage identifier and name BrDisp BrSave MNDisp MNSave 1-Byte ReadOnly 337 37 IBM037 False False False False True True 338 437 IBM437 False False False False True True 339 500 IBM500 False False False False True True 340 708 ASMO-708 True True False False True True 341 720 DOS-720 True True False False True True 342 737 ibm737 False False False False True True 343 775 ibm775 False False False False True True 344 850 ibm850 False False False False True True 345 852 ibm852 True True False False True True 346 855 IBM855 False False False False True True 347 857 ibm857 False False False False True True 348 858 IBM00858 False False False False True True 349 860 IBM860 False False False False True True 350 861 ibm861 False False False False True True 351 862 DOS-862 True True False False True True 352 863 IBM863 False False False False True True 353 864 IBM864 False False False False True True 354 865 IBM865 False False False False True True 355 866 cp866 True True False False True True 356 869 ibm869 False False False False True True 357 870 IBM870 False False False False True True 358 874 windows-874 True True True True True True 359 875 cp875 False False False False True True 360 932 shift_jis True True True True False True 361 936 gb2312 True True True True False True 362 949 ks_c_5601-1987 True True True True False True 363 950 big5 True True True True False True 364 1026 IBM1026 False False False False True True 365 1047 IBM01047 False False False False True True 366 1140 IBM01140 False False False False True True 367 1141 IBM01141 False False False False True True 368 1142 IBM01142 False False False False True True 369 1143 IBM01143 False False False False True True 370 1144 IBM01144 False False False False True True 371 1145 IBM01145 False False False False True True 372 1146 IBM01146 False False False False True True 373 1147 IBM01147 False False False False True True 374 1148 IBM01148 False False False False True True 375 1149 IBM01149 False False False False True True 376 1200 utf-16 False True False False False True 377 1201 unicodeFFFE False False False False False True 378 1250 windows-1250 True True True True True True 379 1251 windows-1251 True True True True True True 380 1252 Windows-1252 True True True True True True 381 1253 windows-1253 True True True True True True 382 1254 windows-1254 True True True True True True 383 1255 windows-1255 True True True True True True 384 1256 windows-1256 True True True True True True 385 1257 windows-1257 True True True True True True 386 1258 windows-1258 True True True True True True 387 1361 Johab False False False False False True 388 10000 macintosh False False False False True True 389 10001 x-mac-japanese False False False False False True 390 10002 x-mac-chinesetrad False False False False False True 391 10003 x-mac-korean False False False False False True 392 10004 x-mac-arabic False False False False True True 393 10005 x-mac-hebrew False False False False True True 394 10006 x-mac-greek False False False False True True 395 10007 x-mac-cyrillic False False False False True True 396 10008 x-mac-chinesesimp False False False False False True 397 10010 x-mac-romanian False False False False True True 398 10017 x-mac-ukrainian False False False False True True 399 10021 x-mac-thai False False False False True True 400 10029 x-mac-ce False False False False True True 401 10079 x-mac-icelandic False False False False True True 402 10081 x-mac-turkish False False False False True True 403 10082 x-mac-croatian False False False False True True 404 20000 x-Chinese-CNS False False False False False True 405 20001 x-cp20001 False False False False False True 406 20002 x-Chinese-Eten False False False False False True 407 20003 x-cp20003 False False False False False True 408 20004 x-cp20004 False False False False False True 409 20005 x-cp20005 False False False False False True 410 20105 x-IA5 False False False False True True 411 20106 x-IA5-German False False False False True True 412 20107 x-IA5-Swedish False False False False True True 413 20108 x-IA5-Norwegian False False False False True True 414 20127 us-ascii False False True True True True 415 20261 x-cp20261 False False False False False True 416 20269 x-cp20269 False False False False True True 417 20273 IBM273 False False False False True True 418 20277 IBM277 False False False False True True 419 20278 IBM278 False False False False True True 420 20280 IBM280 False False False False True True 421 20284 IBM284 False False False False True True 422 20285 IBM285 False False False False True True 423 20290 IBM290 False False False False True True 424 20297 IBM297 False False False False True True 425 20420 IBM420 False False False False True True 426 20423 IBM423 False False False False True True 427 20424 IBM424 False False False False True True 428 20833 x-EBCDIC-KoreanExtended False False False False True True 429 20838 IBM-Thai False False False False True True 430 20866 koi8-r True True True True True True 431 20871 IBM871 False False False False True True 432 20880 IBM880 False False False False True True 433 20905 IBM905 False False False False True True 434 20924 IBM00924 False False False False True True 435 20932 EUC-JP False False False False False True 436 20936 x-cp20936 False False False False False True 437 20949 x-cp20949 False False False False False True 438 21025 cp1025 False False False False True True 439 21866 koi8-u True True True True True True 440 28591 iso-8859-1 True True True True True True 441 28592 iso-8859-2 True True True True True True 442 28593 iso-8859-3 False False True True True True 443 28594 iso-8859-4 True True True True True True 444 28595 iso-8859-5 True True True True True True 445 28596 iso-8859-6 True True True True True True 446 28597 iso-8859-7 True True True True True True 447 28598 iso-8859-8 True True False False True True 448 28599 iso-8859-9 True True True True True True 449 28603 iso-8859-13 False False False False True True 450 28605 iso-8859-15 False True True True True True 451 29001 x-Europa False False False False True True 452 38598 iso-8859-8-i True True True True True True 453 50220 iso-2022-jp False False True True False True 454 50221 csISO2022JP False True True True False True 455 50222 iso-2022-jp False False False False False True 456 50225 iso-2022-kr False False True False False True 457 50227 x-cp50227 False False False False False True 458 51932 euc-jp True True True True False True 459 51936 EUC-CN False False False False False True 460 51949 euc-kr False False True True False True 461 52936 hz-gb-2312 True True True True False True 462 54936 GB18030 True True True True False True 463 57002 x-iscii-de False False False False False True 464 57003 x-iscii-be False False False False False True 465 57004 x-iscii-ta False False False False False True 466 57005 x-iscii-te False False False False False True 467 57006 x-iscii-as False False False False False True 468 57007 x-iscii-or False False False False False True 469 57008 x-iscii-ka False False False False False True 470 57009 x-iscii-ma False False False False False True 471 57010 x-iscii-gu False False False False False True 472 57011 x-iscii-pa False False False False False True 473 65000 utf-7 False False True True False True 474 65001 utf-8 True True True True False True 475 65005 utf-32 False False False False False True 476 65006 utf-32BE False False False False False True 477 478 */ 479 #endregion 480 481 /// <summary> 482 /// 靜態構造函數只有在靜態方法將要使用的時候才進行調用(靜態成員同理) 483 /// </summary> 484 static CsvFileHelper() 485 { 486 isSaveAsExcel = true; 487 defaultEncoding = new System.Text.UTF8Encoding(false); 488 } 489 490 private static bool isSaveAsExcel; 491 private static Encoding defaultEncoding; 492 private static char csvSeparator = ','; 493 //private static Encoding utfBom = System.Text.Encoding.GetEncoding("GB2312"); 494 495 /// <summary> 496 /// get or set Csv Separator (Default Values is ,) 497 /// </summary> 498 public static char DefaultCsvSeparator 499 { 500 get { return csvSeparator; } 501 set { csvSeparator = value; } 502 } 503 504 /// <summary> 505 /// get or set if save as Excel type (出現在首部的“是必須轉義的,而出現在中間的不可以不用專門轉義,而excel對所有雙引號都進行轉義,無論其出現位置) 506 /// </summary> 507 public static bool IsSaveAsExcel 508 { 509 get { return isSaveAsExcel; } 510 set { isSaveAsExcel = value; } 511 } 512 513 /// <summary> 514 /// get or set Default Encoding (notice : if your want the System not with bom ,you should use the relevant Encoding) 515 /// </summary> 516 public static Encoding DefaultEncoding 517 { 518 get { return defaultEncoding; } 519 set { defaultEncoding = value; } 520 } 521 522 private static void WriteCsvVeiw(List<List<string>> yourListCsvData, StreamWriter writer) 523 { 524 foreach (List<string> tempField in yourListCsvData) 525 { 526 WriteCsvLine(tempField, writer); 527 } 528 } 529 530 private static void WriteCsvLine(List<string> fields, StreamWriter writer) 531 { 532 StringBuilder myStrBld = new StringBuilder(); 533 534 //對於CSV數據來說不可能出現一行的數據元素的數量是0的情況,所以不用考慮fields.Count為0的情況(如果為0則為錯誤數據直接忽略) 535 //foreach(string tempField in fields) //使用foreach會產生許多不必要的string拷貝 536 for (int i = 0; i < fields.Count; i++) 537 { 538 //通過文件轉換出來的fields是不會為null的,為了兼容外部構建數據源,可能出現null的情況,這里強制轉換為"" 539 if (fields[i] == null) 540 { 541 myStrBld.Append(""); 542 } 543 else 544 { 545 bool quotesRequired = (isSaveAsExcel ? (fields[i].Contains(csvSeparator) || fields[i].Contains("\r\n") || fields[i].Contains("\"")) : (fields[i].Contains(csvSeparator) || fields[i].Contains("\r\n") || fields[i].StartsWith("\""))); 546 if (quotesRequired) 547 { 548 if (fields[i].Contains("\"")) 549 { 550 myStrBld.Append(String.Format("\"{0}\"", fields[i].Replace("\"", "\"\""))); 551 } 552 else 553 { 554 myStrBld.Append(String.Format("\"{0}\"", fields[i])); 555 } 556 } 557 else 558 { 559 myStrBld.Append(fields[i].TrimStart().TrimEnd()); 560 } 561 } 562 563 if (i < fields.Count - 1) 564 { 565 myStrBld.Append(csvSeparator); 566 } 567 } 568 writer.Write(myStrBld.ToString()); 569 } 570 571 public static void SaveCsvFile(string yourFilePath, List<List<string>> yourDataSouse, bool isAppend, Encoding yourEncode) 572 { 573 //FileStream myCsvStream = new FileStream(yourFilePath, FileMode.Create, FileAccess.ReadWrite); 574 if (isAppend && !File.Exists(yourFilePath)) 575 { 576 throw new Exception("在附加模式下,文件路徑必須存在"); 577 } 578 if (!isAppend && !File.Exists(yourFilePath)) 579 { 580 if (yourFilePath.Contains('\\')) 581 { 582 if (!Directory.Exists(yourFilePath.Remove(yourFilePath.LastIndexOf('\\')))) 583 { 584 throw new Exception("文件路徑或目錄不存在"); 585 } 586 } 587 else 588 { 589 throw new Exception("在文件路徑中查找錯誤"); 590 } 591 } 592 //StreamWriter myCsvSw = new StreamWriter(yourFilePath, isAppend, yourEncode); <br> //isAppend對應的Stream的FileMode 為 append ? FileMode.Append : FileMode.Create 593 //文件如果被其他任務打開並處於Write模式,此處會拋出異常(該工具也含多處異常拋出,使用時務必考慮接收這些異常) 594 StreamWriter myCsvSw = new StreamWriter(new FileStream(yourFilePath, isAppend ? FileMode.Append : FileMode.Create, FileAccess.Write, FileShare.ReadWrite), yourEncode); 595 if (yourDataSouse == null) 596 { 597 throw new Exception("your DataSouse is null"); 598 } 599 WriteCsvVeiw(yourDataSouse, myCsvSw); 600 myCsvSw.Dispose(); 601 } 602 603 public static void SaveCsvFile(string yourFilePath, List<List<string>> yourDataSouse) 604 { 605 SaveCsvFile(yourFilePath, yourDataSouse, false, defaultEncoding); 606 } 607 608 public static Stream OpenFile(string filePath) 609 { 610 Stream myStream; 611 try 612 { 613 myStream = new FileStream(filePath, FileMode.OpenOrCreate, FileAccess.ReadWrite, FileShare.None); 614 } 615 catch (Exception) 616 { 617 return null; 618 } 619 return myStream; 620 } 621 622 #endregion 623 }
生成出來的CSV文件Apple電腦可以識別