前面的文章,介紹了如何通過selenium+Tesseract-OCR來識別圖片驗證碼,如果用接口來訪問的話,再用selenium就閑的笨重,下面就介紹一下分別通過httpclient和HttpURLConnection,用流的方式獲取圖片驗證碼內容。
1.通過HttpURLConnection
package com.imgyzm; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileOutputStream; import java.io.InputStream; import java.net.HttpURLConnection; import java.net.URL; import org.openqa.selenium.io.FileHandler; /** * @author QiaoJiafei * @version 創建時間:2015年11月9日 上午11:31:14 * 類說明 */ public class GetYZMByURL { public static void main(String[] args) throws Exception { getYzm(); } public static String getYzm() { //new一個URL對象 URL url; String s=""; try { url = new URL("http://172.16.30.226:8099/bms/checkcode.do?0.9858807739801705"); HttpURLConnection conn = (HttpURLConnection)url.openConnection(); //設置請求方式為"GET" conn.setRequestMethod("GET"); //超時響應時間為5秒 conn.setConnectTimeout(5 * 1000); //通過輸入流獲取圖片數據 InputStream inStream = conn.getInputStream(); //得到圖片的二進制數據,以二進制封裝得到數據,具有通用性 byte[] data = readInputStream(inStream); //new一個文件對象用來保存圖片,默認保存當前工程根目錄 File imageFile = new File("D:/BeautyGirl.jpg"); //創建輸出流 FileOutputStream outStream = new FileOutputStream(imageFile); //寫入數據 outStream.write(data); //關閉輸出流 outStream.close(); Runtime rt = Runtime.getRuntime(); rt.exec("cmd.exe /C tesseract.exe D:\\BeautyGirl.jpg D:\\ddd\\yzm -1 "); Thread.sleep(1000); File file = new File("D:\\ddd\\yzm.txt"); if(file.exists()) { FileHandler fh = new FileHandler(); s = fh.readAsString(file).trim(); System.out.println("========="+s); } else { System.out.print("yzm.txt不存在"); } } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } return s; //打開鏈接 } public static byte[] readInputStream(InputStream inStream) throws Exception{ ByteArrayOutputStream outStream = new ByteArrayOutputStream(); //創建一個Buffer字符串 byte[] buffer = new byte[1024]; //每次讀取的字符串長度,如果為-1,代表全部讀取完畢 int len = 0; //使用一個輸入流從buffer里把數據讀取出來 while( (len=inStream.read(buffer)) != -1 ){ //用輸出流往buffer里寫入數據,中間參數代表從哪個位置開始讀,len代表讀取的長度 outStream.write(buffer, 0, len); } //關閉輸入流 inStream.close(); //把outStream里的數據寫入內存 return outStream.toByteArray(); } }
2.通過HttpClient
package com.imgyzm; import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; import org.apache.http.HttpEntity; import org.apache.http.HttpResponse; import org.apache.http.client.ClientProtocolException; import org.apache.http.client.HttpClient; import org.apache.http.client.methods.HttpGet; import org.apache.http.client.methods.HttpPost; import org.apache.http.impl.client.DefaultHttpClient; import org.apache.http.impl.conn.PoolingClientConnectionManager; import org.apache.http.util.EntityUtils; import org.openqa.selenium.io.FileHandler; /** * @author QiaoJiafei * @version 創建時間:2015年11月9日 上午10:53:11 * 類說明 */ public class GetYZMByHttpClient { public static void main(String args[]) throws Exception { String s=""; HttpClient httpclient = new DefaultHttpClient(new PoolingClientConnectionManager()); String imgurl = "http://172.16.30.226:8099/bms/checkcode.do?0.9858807739801705"; HttpGet ht = new HttpGet(imgurl); HttpResponse response = null; response = httpclient.execute(ht); HttpEntity entity = response.getEntity(); InputStream inStream = entity.getContent(); byte[] data = readInputStream(inStream); //new一個文件對象用來保存圖片,默認保存當前工程根目錄 File imageFile = new File("D:/yzm.jpg"); //創建輸出流 FileOutputStream outStream = new FileOutputStream(imageFile); //寫入數據 outStream.write(data); //關閉輸出流 outStream.close(); Runtime rt = Runtime.getRuntime(); rt.exec("cmd.exe /C tesseract.exe D:\\yzm.jpg D:\\ddd\\yzm -1 "); Thread.sleep(1000); File file = new File("D:\\ddd\\yzm.txt"); if(file.exists()) { FileHandler fh = new FileHandler(); s = fh.readAsString(file).trim(); System.out.println("========="+s); } else { System.out.print("yzm.txt不存在"); } /*===========下面是登錄接口==========*/ String url = "http://172.16.30.226:8099/bms/staff/login.do?account=admin123&checkcode="+s+"&pwd=aaaaaa1"; System.out.println("url=========="+url); HttpPost httppost = new HttpPost(url); response = httpclient.execute(httppost); entity = response.getEntity(); s = EntityUtils.toString(entity, "UTF-8"); System.out.println(s); //打開鏈接 } public static byte[] readInputStream(InputStream inStream) throws Exception{ ByteArrayOutputStream outStream = new ByteArrayOutputStream(); //創建一個Buffer字符串 byte[] buffer = new byte[1024]; //每次讀取的字符串長度,如果為-1,代表全部讀取完畢 int len = 0; //使用一個輸入流從buffer里把數據讀取出來 while( (len=inStream.read(buffer)) != -1 ){ //用輸出流往buffer里寫入數據,中間參數代表從哪個位置開始讀,len代表讀取的長度 outStream.write(buffer, 0, len); } //關閉輸入流 inStream.close(); //把outStream里的數據寫入內存 return outStream.toByteArray(); } }
那么這兩種方式有什么區別呢,通過測試,使用HttpURLConnection獲取驗證碼,再被其它接口調用的時候,該驗證碼已經失效了。而httpclient,只要保證程序接口調用和獲取驗證碼用的是同一個httpclient,獲取到的驗證碼,再被其它接口調用,該驗證碼仍生效。
保存圖片也可以用下面的方法,更簡便一些
response = client.execute(httpget); entity = response.getEntity(); InputStream in = entity.getContent(); BufferedImage input = ImageIO.read(in); ImageIO.write(input, "jpg", new File("D:/11.jpg"));
HTPPCLIENT API:http://hc.apache.org/httpcomponents-client-ga/httpclient/apidocs/