public class pdfAnalysis { /** * @throws IOException * @param從網絡上下載PDF,截取PDF字符串, */ public static void main(String[] args) throws IOException { // 下載的連接 下載下來的名字 下載下來的路徑 // pdfAnalysis.downLoadByUrl("", "KK.pdf", "F:/"); // 讀取文件 pdfAnalysis pdf = new pdfAnalysis(); // 讀取文件 String pdfName = "F:\\CC.pdf"; // 解析PDF里的值 存入變量pdf_Body String pdf_Body = pdf.readFileOfPDF(pdfName); //System.out.println(pdf_Body); /* String str = pdf_Body.substring(pdf_Body.indexOf("Arrival"), pdf_Body.indexOf("Payment Details")); String str1 = str.substring(str.indexOf("H (")); String [] pp ={"Monday","Tuesday","Wednesday","Thursday","Friday","Saturday" ,"Sunday" }; for(String sto:pp){ if(str1.contains(sto)){ String result = str1.substring(str1.indexOf(sto)); //System.out.println(result); //System.out.println(result.length()); String result2 = result.substring(0,result.indexOf(",")); String result3 = result2.trim(); System.out.println("我要的時間:"+result3+"我是"+pdfName+"文件"); } }*/ /*if(str1.contains("Monday")||str1.contains("Tuesday")|| str1.contains("Wednesday")||str1.contains("Thursday")|| str1.contains("Friday")||str1.contains("Saturday")||str1.contains("Sunday")){ }*/ // System.out.println(str1); // 取出人名值 String name_Temp = pdf_Body.substring(pdf_Body.indexOf("Arrive"), pdf_Body.indexOf("passenger details")); // System.out.println(str); String name_Temp1 = null; String result_name = null; List<String> list_Name = new ArrayList<>(); for (int i = 1; i < name_Temp.length(); i++) { if (name_Temp.contains(i + ".")) { name_Temp1 = name_Temp.substring(name_Temp.indexOf(i + ".")); result_name = name_Temp1.substring(name_Temp1.indexOf(i + ".") + 3, name_Temp1.indexOf("Seat Number Services")); list_Name.add(result_name); } // System.out.println(add); // System.out.println(str2); if (name_Temp1.equals("null")) { continue; } } for (String i : list_Name) { System.out.println("所有的人名:" + i); }*/ if (pdfAnalysis.infile != null) { pdfAnalysis.infile.close(); System.out.println("我要准備關閉PDF文檔了"); } } public static int appearNumber(String srcText, String temp) { int count = 0; Pattern p = Pattern.compile(temp); Matcher m = p.matcher(srcText); while (m.find()) { count++; } return count; } public static FileInputStream infile = null; public String readFileOfPDF(String pdfName) throws IOException { String context = null; File file = new File(pdfName);// 創建一個文件對象 try { infile = new FileInputStream(pdfName);// 創建一個文件輸入流 // 新建一個PDF解析器對象 PDFParser parser = new PDFParser(infile); // 對PDF文件進行解析 parser.parse(); // 獲取解析后得到的PDF文檔對象 PDDocument pdfdocument = parser.getPDDocument(); // 新建一個PDF文本剝離器 PDFTextStripper stripper = new PDFTextStripper(); // 從PDF文檔對象中剝離文本 context = stripper.getText(pdfdocument); System.out.println("PDF文件" + file.getAbsolutePath() + "的文本內容如下:"); // System.out.println(context); } catch (Exception e) { System.out.println("讀取PDF文件" + file.getAbsolutePath() + "失敗!" + e.getMessage()); } finally { if (infile != null) { try { infile.close(); } catch (IOException e1) { } } } return context; }