【Java】驗證碼識別解決方案


對於類似以下簡單的驗證碼的識別方案:

1、

2

3

4、

 

1、建庫:切割驗證碼為單個字符,人工標記,比如:A。

 

2、識別:給一個驗證碼:切割為單個字符,在庫中查詢識別。

/***
 * author:chzeze
 * 識別驗證碼並返回
 * train_path 驗證碼字母圖庫位置
 * 驗證碼圖片緩存位置:Configuration.getProperties("web_save_path")+"/captcha.jpg"
 */
public class AmGetCaptchaTest {
    private static Logger logger = Logger.getLogger(AmGetCaptchaTest.class);
    private static String train_path = "/data/sata/share_sata/AmazonCrawl/amazonWeb/captcha";
    private static Map<BufferedImage, String> trainMap = null;
    private static int index = 0;
    private static int imgnum = 0;
    private static MultiThreadedHttpConnectionManager httpConnectionManager = new MultiThreadedHttpConnectionManager();
    private static HttpClient client = new HttpClient(httpConnectionManager);
/*    static {               
        //每主機最大連接數和總共最大連接數,通過hosfConfiguration設置host來區分每個主機   
        client.getHttpConnectionManager().getParams().setDefaultMaxConnectionsPerHost(8); 
        client.getHttpConnectionManager().getParams().setMaxTotalConnections(48); 
        client.getHttpConnectionManager().getParams().setConnectionTimeout(10000); 
        client.getHttpConnectionManager().getParams().setSoTimeout(10000); 
        client.getHttpConnectionManager().getParams().setTcpNoDelay(true); 
        client.getHttpConnectionManager().getParams().setLinger(1000);                    
        //失敗的情況下會進行3次嘗試,成功之后不會再嘗試 
        client.getHttpConnectionManager().getParams().setParameter(HttpMethodParams.RETRY_HANDLER, new DefaultHttpMethodRetryHandler()); 
    }*/
    public static int isBlack(int colorInt) {
        Color color = new Color(colorInt);
        if (color.getRed() + color.getGreen() + color.getBlue() <= 100) {
            return 1;
        }
        return 0;
    }

    public static int isWhite(int colorInt) {
        Color color = new Color(colorInt);
        if (color.getRed() + color.getGreen() + color.getBlue() > 600) {
            return 1;
        }
        return 0;
    }

    public static BufferedImage removeBackgroud(String picFile)
            throws Exception {
        BufferedImage img = ImageIO.read(new File(picFile));
        img = img.getSubimage(1, 1, img.getWidth() - 2, img.getHeight() - 2);
        int width = img.getWidth();
        int height = img.getHeight();
        double subWidth = width / 5.0;
        for (int i = 0; i < 5; i++) {
            Map<Integer, Integer> map = new HashMap<Integer, Integer>();
            for (int x = (int) (1 + i * subWidth); x < (i + 1) * subWidth
                    && x < width - 1; ++x) {
                for (int y = 0; y < height; ++y) {
                    if (isWhite(img.getRGB(x, y)) == 1)
                        continue;
                    if (map.containsKey(img.getRGB(x, y))) {
                        map.put(img.getRGB(x, y), map.get(img.getRGB(x, y)) + 1);
                    } else {
                        map.put(img.getRGB(x, y), 1);
                    }
                }
            }
            int max = 0;
            int colorMax = 0;
            for (Integer color : map.keySet()) {
                if (max < map.get(color)) {
                    max = map.get(color);
                    colorMax = color;
                }
            }
            for (int x = (int) (1 + i * subWidth); x < (i + 1) * subWidth
                    && x < width - 1; ++x) {
                for (int y = 0; y < height; ++y) {
                    if (img.getRGB(x, y) != colorMax) {
                        img.setRGB(x, y, Color.WHITE.getRGB());
                    } else {
                        img.setRGB(x, y, Color.BLACK.getRGB());
                    }
                }
            }
        }
        return img;
    }

    public static BufferedImage removeBlank(BufferedImage img) throws Exception {
        int width = img.getWidth();
        int height = img.getHeight();
        int start = 0;
        int end = 0;
        Label1: for (int y = 0; y < height; ++y) {
            for (int x = 0; x < width; ++x) {
                if (isBlack(img.getRGB(x, y)) == 1) {
                    start = y;
                    break Label1;
                }
            }
        }
        Label2: for (int y = height - 1; y >= 0; --y) {
            for (int x = 0; x < width; ++x) {
                if (isBlack(img.getRGB(x, y)) == 1) {
                    end = y;
                    break Label2;
                }
            }
        }
        return img.getSubimage(0, start, width, end - start + 1);
    }

    public static List<BufferedImage> splitImage(BufferedImage img)
            throws Exception {
        List<BufferedImage> subImgs = new ArrayList<BufferedImage>();
        int width = img.getWidth();
        int height = img.getHeight();
        List<Integer> weightlist = new ArrayList<Integer>();
        for (int x = 0; x < width; ++x) {
            int count = 0;
            for (int y = 0; y < height; ++y) {
                if (isBlack(img.getRGB(x, y)) == 1) {
                    count++;
                }
            }
            weightlist.add(count);
        }
        for (int i = 0; i < weightlist.size();i++) {
            int length = 0;
            while (i < weightlist.size() && weightlist.get(i) > 0) {
                i++;
                length++;
            }
            if (length > 2) {
                subImgs.add(removeBlank(img.getSubimage(i - length, 0,
                        length, height)));
            }
        }
        return subImgs;
    }

    public static Map<BufferedImage, String> loadTrainData() throws Exception {
        if (trainMap == null) {
            Map<BufferedImage, String> map = new HashMap<BufferedImage, String>();
            File dir = new File(train_path);
            File[] files = dir.listFiles();
            for (File file : files) {
                map.put(ImageIO.read(file), file.getName().charAt(0) + "");
            }
            trainMap = map;
        }
        return trainMap;
    }

    public static String getSingleCharOcr(BufferedImage img,
            Map<BufferedImage, String> map) {
        String result = "#";
        int width = img.getWidth();
        int height = img.getHeight();
        int min = width * height;
        for (BufferedImage bi : map.keySet()) {
            int count = 0;
            if (Math.abs(bi.getWidth()-width) > 2)
                continue;
            int widthmin = width < bi.getWidth() ? width : bi.getWidth();
            int heightmin = height < bi.getHeight() ? height : bi.getHeight();
            Label1: for (int x = 0; x < widthmin; ++x) {
                for (int y = 0; y < heightmin; ++y) {
                    if (isBlack(img.getRGB(x, y)) != isBlack(bi.getRGB(x, y))) {
                        count++;
                        if (count >= min)
                            break Label1;
                    }
                }
            }
            if (count < min) {
                min = count;
                result = map.get(bi);
            }
        }
        return result;
    }

    public static String getAllOcr(String file) throws Exception {
        BufferedImage img = removeBackgroud(file);//去除重影
        List<BufferedImage> listImg = splitImage(img);//切割圖片
        Map<BufferedImage, String> map = loadTrainData();
        String result = "";
        for (BufferedImage bi : listImg) {
            result += getSingleCharOcr(bi, map);
        }
        //ImageIO.write(img, "JPG", new File("result6\\" + result + ".jpg"));
        return result;
    }
    /***
     * 下載驗證碼圖片暫時保存供識別程序使用
     * @param imgurl 驗證碼圖片url
     */
    public static void downloadimg(String imgurl)
    {
        //HttpClient httpClient = new HttpClient();
        
        //httpClient.getHttpConnectionManager().getParams().setConnectionTimeout(10000);
        //httpClient.getHttpConnectionManager().getParams().setSoTimeout(10000);
        GetMethod getMethod = new GetMethod(imgurl);
        try {
            int statusCode = client.executeMethod(getMethod);
            System.out.println(statusCode);
            if (statusCode != HttpStatus.SC_OK) {
                System.err.println("("+statusCode+")Method failed: "+ getMethod.getStatusLine());
                logger.info("("+statusCode+")Method failed: "+ getMethod.getStatusLine());
            }
            InputStream inputStream = getMethod.getResponseBodyAsStream();
            OutputStream outStream = new FileOutputStream("/data/sata/share_sata/AmazonCrawl/amazonWeb/captcha.jpg");
            IOUtils.copy(inputStream, outStream);
            inputStream.close();
            outStream.close();
        } catch (IOException e) {
            // TODO Auto-generated catch block
            //logger.info(new Date()+"captcha appear exception:"+e.getMessage());
            try {
                //若遇到異常則睡眠20秒后繼續重試
                Thread.sleep(20000);
            } catch (InterruptedException e1) {
                logger.error(e1);
            }
            e.printStackTrace();
        }finally {
            getMethod.releaseConnection();
        }
    }
    /***
     * 抽取頁面驗證碼並返回
     * @param stringBuffer
     * @return 驗證碼字符串
     */
    public static String GetCaptcha(StringBuilder html){
        String captcha_str="######";//未識別則為#
        Document doc = Jsoup.parse(html.toString());
        String imgurl = doc.select("div[class=a-row a-text-center]").get(0).child(0).attr("src");
        //System.out.println(imgurl);
        downloadimg(imgurl);
        try {
            captcha_str = getAllOcr("/data/sata/share_sata/AmazonCrawl/amazonWeb/captcha.jpg");
        } catch (Exception e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
        return captcha_str;
    }
}

后記:復雜驗證碼識別

對於復雜的驗證碼識別:目前的最簡單的方案就是交給第三方人工打碼平台:可以參考我做的EBay多線程打碼兔驗證碼解決方案:

http://www.cnblogs.com/zeze/p/6402963.html

更專業的可以采用機器學習、模式識別等方法去實現,但是識別成功率,我目前測試的結果不是很理想,復雜的驗證碼,正確率在百分之二三十上下,但是我的訓練樣本庫不是很大,提高訓練的樣本可能結果會好一點。

 


免責聲明!

本站轉載的文章為個人學習借鑒使用,本站對版權不負任何法律責任。如果侵犯了您的隱私權益,請聯系本站郵箱yoyou2525@163.com刪除。



 
粵ICP備18138465號   © 2018-2025 CODEPRJ.COM