寫了一個簡單java工具類,用於驗證碼點陣打印+自動識別。為了提升識別精度和程序性能,此工具類是針對特定類型的驗證碼的,若要用於其他類型的驗證碼識別,需要做相應調整。
文章分兩部分演示了此java工具類如何識別靜態驗證碼圖片和動態驗證碼gif。
一、靜態驗證碼圖片識別
輸入驗證碼:
程序運行結果:
======= print and recognize captchapic =======
"................................................................................",
"................................................................................",
"................................................................................",
"................##.##........#####..............................................",
"................##.##.......##...##.............................................",
"................##.##.............##............................................",
"............###.##.##.###........##....#####....................................",
"...........##..###.###..##.....###....##...##...................................",
"..........##....##.##....##......##........##...................................",
"..........##....##.##....##.......##..#######...................................",
"..........##....##.##....##.......##.##....##...................................",
"...........##..###.##....##.##...##..##...###...................................",
"............###.##.##....##..#####....####.##...................................",
"................................................................................",
"................................................................................",
"................................................................................",
"................................................................................",
"................................................................................",
"................................................................................",
"................................................................................"
recognize: dh3a
相應代碼如下:
package com.demo.check;
import org.apache.commons.httpclient.HttpClient;
import org.apache.commons.httpclient.HttpStatus;
import org.apache.commons.httpclient.methods.GetMethod;
import org.apache.commons.io.IOUtils;
import javax.imageio.ImageIO;
import java.awt.*;
import java.awt.image.BufferedImage;
import java.io.*;
import java.util.ArrayList;
import java.util.List;
public class CaptchaRecognizer {
public static void main(String[] args) {
HttpClient httpClient = new HttpClient();
GetMethod getMethod = new GetMethod("https://img2020.cnblogs.com/blog/1039974/202011/1039974-20201119224011928-1654538410.png"); // 驗證碼鏈接
for (int i = 0; i < 5; i++) {
try {
// 執行get請求
int statusCode = httpClient.executeMethod(getMethod);
if (statusCode != HttpStatus.SC_OK) {
System.err.println("Method failed: " + getMethod.getStatusLine());
} else {
File captcha = File.createTempFile("ybt", ".png");
OutputStream outStream = new FileOutputStream(captcha);
InputStream inputStream = getMethod.getResponseBodyAsStream();
IOUtils.copy(inputStream, outStream);
outStream.close();
BufferedImage image = ImageIO.read(captcha);
System.out.println("======= print and recognize captchapic =======");
printImage(image);
System.out.printf("recognize: %s\n", recognizeCaptcha(image));
}
} catch (Exception e) {
e.printStackTrace();
} finally {
// 釋放連接
getMethod.releaseConnection();
}
}
}
/**
* @param colorInt 像素點的RGB值
* @return
*/
private static boolean isBlack(int colorInt) {
Color color = new Color(colorInt);
if (color.getRed() + color.getGreen() + color.getBlue() <= 10) {
return true;
}
return false;
}
/**
* @param image 需要打印的圖像
* @throws IOException
*/
private static void printImage(BufferedImage image) {
int h = image.getHeight();
int w = image.getWidth();
// 矩陣打印
for (int y = 0; y < h; y++) {
System.out.printf("\"");
for (int x = 0; x < w; x++) {
if (isBlack(image.getRGB(x, y))) {
System.out.print("#");
} else {
System.out.print(".");
}
}
System.out.printf("%s", y == h-1 ? "\"" : "\",");
System.out.println();
}
}
/**
* @param image 待識別的符號圖片
* @return
*/
private static char recognizeSymbol(BufferedImage image) {
int h = image.getHeight();
int w = image.getWidth();
int minDiff = 999999;
char symAns = 0;
// 對於某個給定數值
for (int i = 0; i < 10; i++) {
int curDiff = 0;
for (int y = 0; y < h; y++) {
for (int x = 0; x < w; x++) {
boolean pixel1 = digitals[i][y].charAt(x) == '#';
boolean pixel2 = isBlack(image.getRGB(x, y));
if (pixel1 != pixel2) {
++curDiff;
}
}
}
if (curDiff < minDiff) {
minDiff = curDiff;
symAns = (char) ('0' + i);
}
if (minDiff == 0) {
return symAns;
}
}
// 對於某個給定字母
for (int i = 0; i < 26; i++) {
int curDiff = 0;
for (int y = 0; y < h; y++) {
for (int x = 0; x < w; x++) {
boolean pixel1 = alphas[i][y].charAt(x) == '#';
boolean pixel2 = isBlack(image.getRGB(x, y));
if (pixel1 != pixel2) {
++curDiff;
}
}
}
if (curDiff < minDiff) {
minDiff = curDiff;
symAns = (char) ('a' + i);
}
if (minDiff == 0) {
return symAns;
}
}
return symAns;
}
/**
* @param image 需要被分割的驗證碼
* @return
*/
private static List<BufferedImage> splitImage(BufferedImage image) {
List<BufferedImage> subImgs = new ArrayList<BufferedImage>();
subImgs.add(image.getSubimage(10, 3, 8, 12));
subImgs.add(image.getSubimage(19, 3, 8, 12));
subImgs.add(image.getSubimage(28, 3, 8, 12));
subImgs.add(image.getSubimage(37, 3, 8, 12));
return subImgs;
}
/**
* @param image 待識別的驗證碼
* @return
*/
public static String recognizeCaptcha(BufferedImage image) {
StringBuilder ans = new StringBuilder();
List<BufferedImage> subImgs = splitImage(image);
for (BufferedImage subImg : subImgs) {
// 依次識別子圖片
ans.append(recognizeSymbol(subImg));
}
return ans.toString();
}
private static String[][] digitals = new String[][]{
{
"........",
"........",
"........",
"........",
"........",
"........",
"........",
"........",
"........",
"........",
"........",
"........"
},
{
"........",
"........",
"........",
"........",
"........",
"........",
"........",
"........",
"........",
"........",
"........",
"........"
},
{
"..####..",
".##..##.",
"##....##",
"......##",
".....##.",
"....##..",
"...##...",
"..##....",
".##.....",
"########",
"........",
"........"
},
{
".#####..",
"##...##.",
"......##",
".....##.",
"...###..",
".....##.",
"......##",
"......##",
"##...##.",
".#####..",
"........",
"........"
},
{
".....##.",
"....###.",
"...####.",
"..##.##.",
".##..##.",
"##...##.",
"########",
".....##.",
".....##.",
".....##.",
"........",
"........"
},
{
"#######.",
"##......",
"##......",
"##.###..",
"###..##.",
"......##",
"......##",
"##....##",
".##..##.",
"..####..",
"........",
"........"
},
{
"..####..",
".##..##.",
"##....#.",
"##......",
"##.###..",
"###..##.",
"##....##",
"##....##",
".##..##.",
"..####..",
"........",
"........"
},
{
"########",
"......##",
"......##",
".....##.",
"....##..",
"...##...",
"..##....",
".##.....",
"##......",
"##......",
"........",
"........"
},
{
"..####..",
".##..##.",
"##....##",
".##..##.",
"..####..",
".##..##.",
"##....##",
"##....##",
".##..##.",
"..####..",
"........",
"........"
},
{
"..####..",
".##..##.",
"##....##",
"##....##",
".##..###",
"..###.##",
"......##",
".#....##",
".##..##.",
"..####..",
"........",
"........"
}
};
private static String[][] alphas = new String[][]{
{
"........",
"........",
"........",
"..#####.",
".##...##",
"......##",
".#######",
"##....##",
"##...###",
".####.##",
"........",
"........"
},
{
"##......",
"##......",
"##......",
"##.###..",
"###..##.",
"##....##",
"##....##",
"##....##",
"###..##.",
"##.###..",
"........",
"........"
},
{
"........",
"........",
"........",
"..#####.",
".##...##",
"##......",
"##......",
"##......",
".##...##",
"..#####.",
"........",
"........"
},
{
"......##",
"......##",
"......##",
"..###.##",
".##..###",
"##....##",
"##....##",
"##....##",
".##..###",
"..###.##",
"........",
"........"
},
{
"........",
"........",
"........",
"..####..",
".##..##.",
"##....##",
"########",
"##......",
".##...##",
"..#####.",
"........",
"........"
},
{
"...####.",
"..##..##",
"..##..##",
"..##....",
"..##....",
"######..",
"..##....",
"..##....",
"..##....",
"..##....",
"........",
"........"
},
{
"........",
"........",
"........",
".#####.#",
"##...###",
"##...##.",
"##...##.",
".#####..",
"##......",
".######.",
"##....##",
".######."
},
{
"##......",
"##......",
"##......",
"##.###..",
"###..##.",
"##....##",
"##....##",
"##....##",
"##....##",
"##....##",
"........",
"........"
},
{
"...##...",
"...##...",
"........",
"..###...",
"...##...",
"...##...",
"...##...",
"...##...",
"...##...",
".######.",
"........",
"........"
},
{
".....##.",
".....##.",
"........",
"....###.",
".....##.",
".....##.",
".....##.",
".....##.",
".....##.",
"##...##.",
"##...##.",
".#####.."
},
{
".##.....",
".##.....",
".##.....",
".##..##.",
".##.##..",
".####...",
".####...",
".##.##..",
".##..##.",
".##...##",
"........",
"........"
},
{
"........",
"........",
"........",
"........",
"........",
"........",
"........",
"........",
"........",
"........",
"........",
"........"
},
{
"........",
"........",
"........",
"#.##.##.",
"##.##.##",
"##.##.##",
"##.##.##",
"##.##.##",
"##.##.##",
"##.##.##",
"........",
"........"
},
{
"........",
"........",
"........",
"##.###..",
"###..##.",
"##....##",
"##....##",
"##....##",
"##....##",
"##....##",
"........",
"........"
},
{
"........",
"........",
"........",
"........",
"........",
"........",
"........",
"........",
"........",
"........",
"........",
"........"
},
{
"........",
"........",
"........",
"##.###..",
"###..##.",
"##....##",
"##....##",
"##....##",
"###..##.",
"##.###..",
"##......",
"##......"
},
{
"........",
"........",
"........",
"..###.##",
".##..###",
"##....##",
"##....##",
"##....##",
".##..###",
"..###.##",
"......##",
"......##"
},
{
"........",
"........",
"........",
"##.####.",
".###..##",
".##.....",
".##.....",
".##.....",
".##.....",
".##.....",
"........",
"........"
},
{
"........",
"........",
"........",
".######.",
"##....##",
"##......",
".######.",
"......##",
"##....##",
".######.",
"........",
"........"
},
{
"........",
"..##....",
"..##....",
"######..",
"..##....",
"..##....",
"..##....",
"..##....",
"..##..##",
"...####.",
"........",
"........"
},
{
"........",
"........",
"........",
"##....##",
"##....##",
"##....##",
"##....##",
"##....##",
".##..###",
"..###.##",
"........",
"........"
},
{
"........",
"........",
"........",
"##....##",
"##....##",
".##..##.",
".##..##.",
"..####..",
"..####..",
"...##...",
"........",
"........"
},
{
"........",
"........",
"........",
"##....##",
"##....##",
"##.##.##",
"##.##.##",
"##.##.##",
"########",
".##..##.",
"........",
"........"
},
{
"........",
"........",
"........",
"##....##",
".##..##.",
"..####..",
"...##...",
"..####..",
".##..##.",
"##....##",
"........",
"........"
},
{
"........",
"........",
"........",
"##....##",
"##....##",
"##....##",
"##....##",
"##....##",
".##..###",
"..###.##",
"#.....##",
".######."
},
{
"........",
"........",
"........",
".######.",
".....##.",
"....##..",
"...##...",
"..##....",
".##.....",
".######.",
"........",
"........"
}
};
}
二、動態驗證碼gif識別
動態驗證碼gif的識別和靜態驗證碼圖片的識別非常相識,兩者之間唯一的區別在於gif是由多幀靜態圖片所構成的。所以我們處理gif的思路很簡單,從特定幀的靜態圖片中,識別需要的驗證符號。
輸入驗證碼:
程序運行結果:
======= print and recognize captchapic =======
"..................................................................................................................................................",
"..................................................................................................................................................",
"....................#####.........................................................................................................................",
".................###.....###......................................................................................................................",
".................#.........#......................................................................................................................",
".................#..........#.....................................................................................................................",
"................#...........#.....................................................................................................................",
"................#.#######....#..........................######....................................................................................",
"................###########.#.........................##########..................................................................................",
"................#############........................###########..................................................................................",
"................##.....#####.........................##.....#####.................................................................................",
".................##.....####................................#####.................................................................................",
"...................###.####..................................####........................#######..................................................",
"......................#####.................................#######....................##########.................................................",
".....................#####..................................####....#..................###########................................................",
".................########..................................####......#.................#.....#####................................................",
".................#######..................................####.......#........................####................................................",
".................#########...............................####.........#.......................####................................................",
".....................######.............................###.#........#...................########.................................................",
"....................#######...........................####...#.......#.................##########.................................................",
"..................##...######........................####.....##..###.................####...####.................................................",
".................#.....####..#......................####.........#...................####....####.................................................",
".................#....#####..#.....................####..............................####....####.................................................",
"..............#...##.#####.##.....................####...............................####...#####.................................................",
".............##############......................#############.......................#############................................................",
".............###########.........................#############.......................######..#####................................................",
"...............#######...........................#############........................####...#####................................................",
"..................................................................................................................................................",
"..................................................................................................................................................",
"..................................................................................................................................................",
"..................................................................................................................................................",
"..................................................................................................................................................",
".................................................................................................................................................."
<<< frame >>>
"..................................................................................................................................................",
"..................................................................................................................................................",
"..................................................................................................................................................",
"..................................................................................................................................................",
"..................................................................................................................................................",
".................................................................................................................................#####............",
"...............................................................................................................................########...........",
"........................................................######................................................................########............",
"......................................................##########.........................................##...................####...#............",
".....................................................###########........................................#..#.................####.................",
".....................................................##.....#####.......................................#..#.................####.................",
"............................................................#####......................................#....#................####.................",
".............................................................####........................#######.......#....#..............##########.............",
"............................................................####.......................##########......#....#.............##########..............",
"............................................................####.......................###########.....#....#.............##########..............",
"...........................................................####........................#.....#####.....#....#...............####..................",
"..........................................................####................................####......#..#................####..................",
".........................................................####.................................####......#..#................####..................",
"........................................................###..............................########........##................####...................",
"......................................................####.............................##########..........................####...................",
".....................................................####.............................####...####..........................####...................",
"....................................................####......................####...####....####..........................####...................",
"...................................................####......................#....#..####....####.......####...............####...................",
"..................................................####.......................#....#..####...#####.....##....###............####...................",
".................................................#############...............#.....#.#############...#.........#..........####....................",
".................................................#############...............#....#..######..#####...#.........#..........####....................",
".................................................#############...............#....#...####...#####..#...........#.........####....................",
"..............................................................................##.#..................#...........#.................................",
"................................................................................#...................#............#................................",
"....................................................................................................#...........#.................................",
".....................................................................................................#..........#.................................",
".....................................................................................................#.........#..................................",
".....................................................................................................##.......##.................................."
recognize: 32af
以下貼出gif的分割函數,其它過程和第一部分基本相同,之后逐個解析靜態子圖片即可。
/**
* @param file 需要被分割的gif文件
* @throws Exception
*/
private static List<BufferedImage> splitGif(File file) throws IOException {
FileImageInputStream in = new FileImageInputStream(file);
ImageReaderSpi readerSpi = new GIFImageReaderSpi();
GIFImageReader gifReader = (GIFImageReader) readerSpi.createReaderInstance();
gifReader.setInput(in);
int num = gifReader.getNumImages(true);
ImageWriterSpi writerSpi = new GIFImageWriterSpi();
GIFImageWriter writer = (GIFImageWriter) writerSpi.createWriterInstance();
List<BufferedImage> subImgs = new ArrayList<BufferedImage>();
for (int i = 0; i < num; i++) {
File frame = File.createTempFile("mxt", ".png");
FileImageOutputStream out = new FileImageOutputStream(frame);
writer.setOutput(out);
writer.write(gifReader.read(num - i - 1));
out.close();
// 分割每一幀圖片,進行識別
BufferedImage image = ImageIO.read(frame);
if (i == 1 || i == 2) {
printImage(image);
System.out.println("<<< frame >>>");
}
subImgs.add(image.getSubimage(7 + i * 36, 5, 30, 27));
}
in.close();
return subImgs;
}
參考鏈接
[1] https://blog.csdn.net/problc/article/details/5794460#commentBox
[2] https://blog.csdn.net/lmj623565791/article/details/23960391/
[3] https://blog.csdn.net/chwshuang/article/details/64923354