1 var pdfReader = new PdfReader("xxx.pdf"); 2 3 StreamWriter output = new StreamWriter(new FileStream("處理結果.txt", FileMode.Create)); 4 5 int pageCount = pdfReader.NumberOfPages; 6 for (int pg = 1; pg <= pageCount; pg++) 7 { 8 ITextExtractionStrategy strategy = new SimpleTextExtractionStrategy(); 9 var value = PdfTextExtractor.GetTextFromPage(pdfReader, pg, strategy); 10 value = value.Replace(" ", ""); 11 Console.WriteLine(value); 12 output.Write(value); 13 } 14 15 output.Flush(); 16 output.Close(); 17 Console.Write("處理完畢"); 18 Console.ReadLine();
該方法讀出的漢字不會亂碼。