Java判斷中文字符

本文轉載自查看原文 2017-12-22 09:02 4185 java/ 編程語言-[Java]

package com.jsoft.test;

import java.util.regex.Pattern;

/**
 * 判斷中文字符
 *
 * @author jim
 * @date 2017-12-22
 */
public class ChineseHelper {
    public static void main(String[] args) {
        // 純英文
        String s1 = "Hello,Tom.!@#$%^&*()_+-={}|[];':\"?";
        // 純中文（不含中文標點）
        String s2 = "你好中國";
        // 純中文（含中文標點）
        String s3 = "你好，中國。《》：“”‘'；（）【】！￥、";
        // 韓文
        String s4 = "한국어난";
        // 日文
        String s5 = "ぎじゅつ";
        // 特殊字符
        String s6 = "��";
        String s7 = "╃";
        String s8 = "╂";
        // 繁體中文
        String s9 = "蒼老師";
        // 1 使用字符范圍判斷
        System.out.println("s1是否包含中文：" + hasChineseByRange(s1));// false
        System.out.println("s2是否包含中文：" + hasChineseByRange(s2));// true
        System.out.println("s3是否包含中文：" + hasChineseByRange(s3));// true
        System.out.println("s4是否包含中文：" + hasChineseByRange(s4));// false
        System.out.println("s5是否包含中文：" + hasChineseByRange(s5));// false
        System.out.println("s6是否包含中文：" + hasChineseByRange(s6));// false
        System.out.println("s7是否包含中文：" + hasChineseByRange(s7));// false
        System.out.println("s8是否包含中文：" + hasChineseByRange(s8));// false
        System.out.println("s9是否包含中文：" + hasChineseByRange(s9));// true
        System.out.println("-------分割線-------");
        System.out.println("s1是否全是中文：" + isChineseByRange(s1));// false
        System.out.println("s2是否全是中文：" + isChineseByRange(s2));// true
        System.out.println("s3是否全是中文：" + isChineseByRange(s3));// false 中文標點不在范圍內
        System.out.println("s4是否全是中文：" + isChineseByRange(s4));// false
        System.out.println("s5是否全是中文：" + isChineseByRange(s5));// false
        System.out.println("s6是否全是中文：" + isChineseByRange(s6));// false
        System.out.println("s7是否全是中文：" + isChineseByRange(s7));// false
        System.out.println("s8是否全是中文：" + isChineseByRange(s8));// false
        System.out.println("s9是否全是中文：" + isChineseByRange(s9));// true
        System.out.println("-------分割線-------");
        // 2 使用字符范圍正則判斷（結果同1）
        System.out.println("s1是否包含中文：" + hasChineseByReg(s1));// false
        System.out.println("s2是否包含中文：" + hasChineseByReg(s2));// true
        System.out.println("s3是否包含中文：" + hasChineseByReg(s3));// true
        System.out.println("s4是否包含中文：" + hasChineseByReg(s4));// false
        System.out.println("s5是否包含中文：" + hasChineseByReg(s5));// false
        System.out.println("s6是否包含中文：" + hasChineseByReg(s6));// false
        System.out.println("s7是否包含中文：" + hasChineseByReg(s7));// false
        System.out.println("s8是否包含中文：" + hasChineseByReg(s8));// false
        System.out.println("s9是否包含中文：" + hasChineseByReg(s9));// true
        System.out.println("-------分割線-------");
        System.out.println("s1是否全是中文：" + isChineseByReg(s1));// false
        System.out.println("s2是否全是中文：" + isChineseByReg(s2));// true
        System.out.println("s3是否全是中文：" + isChineseByReg(s3));// false 中文標點不在范圍內
        System.out.println("s4是否全是中文：" + isChineseByReg(s4));// false
        System.out.println("s5是否全是中文：" + isChineseByReg(s5));// false
        System.out.println("s6是否全是中文：" + isChineseByReg(s6));// false
        System.out.println("s7是否全是中文：" + isChineseByReg(s7));// false
        System.out.println("s8是否全是中文：" + isChineseByReg(s8));// false
        System.out.println("s9是否全是中文：" + isChineseByReg(s9));// true
        System.out.println("-------分割線-------");
        // 3 使用CJK字符集判斷
        System.out.println("s1是否包含中文：" + hasChinese(s1));// false
        System.out.println("s2是否包含中文：" + hasChinese(s2));// true
        System.out.println("s3是否包含中文：" + hasChinese(s3));// true
        System.out.println("s4是否包含中文：" + hasChinese(s4));// false
        System.out.println("s5是否包含中文：" + hasChinese(s5));// false
        System.out.println("s6是否包含中文：" + hasChinese(s6));// false
        System.out.println("s7是否包含中文：" + hasChinese(s7));// false
        System.out.println("s8是否包含中文：" + hasChinese(s8));// false
        System.out.println("s9是否包含中文：" + hasChinese(s9));// true
        System.out.println("-------分割線-------");
        System.out.println("s1是否全是中文：" + isChinese(s1));// false
        System.out.println("s2是否全是中文：" + isChinese(s2));// true
        System.out.println("s3是否全是中文：" + isChinese(s3));// true 中文標點也被包含進來
        System.out.println("s4是否全是中文：" + isChinese(s4));// false
        System.out.println("s5是否全是中文：" + isChinese(s5));// false
        System.out.println("s6是否全是中文：" + isChinese(s6));// false
        System.out.println("s7是否全是中文：" + isChinese(s7));// false
        System.out.println("s8是否全是中文：" + isChinese(s8));// false
        System.out.println("s9是否全是中文：" + isChinese(s9));// true
    }

    /**
     * 是否包含中文字符<br>
     * 包含中文標點符號<br>
     *
     * @param str
     * @return
     */
    public static boolean hasChinese(String str) {
        if (str == null) {
            return false;
        }
        char[] ch = str.toCharArray();
        for (char c : ch) {
            if (isChinese(c)) {
                return true;
            }
        }
        return false;
    }

    /**
     * 是否全是中文字符<br>
     * 包含中文標點符號<br>
     *
     * @param str
     * @return
     */
    public static boolean isChinese(String str) {
        if (str == null) {
            return false;
        }
        char[] ch = str.toCharArray();
        for (char c : ch) {
            if (!isChinese(c)) {
                return false;
            }
        }
        return true;
    }

    /**
     * 是否是中文字符<br>
     * 包含中文標點符號<br>
     *
     * @param c
     * @return
     */
    private static boolean isChinese(char c) {
        Character.UnicodeBlock ub = Character.UnicodeBlock.of(c);
        if (ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS) {
            return true;
        } else if (ub == Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS) {
            return true;
        } else if (ub == Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION) {
            return true;
        } else if (ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A) {
            return true;
        } else if (ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B) {
            return true;
        } else if (ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C) {
            return true;
        } else if (ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D) {
            return true;
        } else if (ub == Character.UnicodeBlock.GENERAL_PUNCTUATION) {
            return true;
        } else if (ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS) {
            return true;
        }
        return false;
    }

    /**
     * 是否包含漢字<br>
     * 根據漢字編碼范圍進行判斷<br>
     * CJK統一漢字（不包含中文的，。《》（）“‘'”、！￥等符號）<br>
     *
     * @param str
     * @return
     */
    public static boolean hasChineseByReg(String str) {
        if (str == null) {
            return false;
        }
        Pattern pattern = Pattern.compile("[\\u4E00-\\u9FBF]+");
        return pattern.matcher(str).find();
    }

    /**
     * 是否全是漢字<br>
     * 根據漢字編碼范圍進行判斷<br>
     * CJK統一漢字（不包含中文的，。《》（）“‘'”、！￥等符號）<br>
     *
     * @param str
     * @return
     */
    public static boolean isChineseByReg(String str) {
        if (str == null) {
            return false;
        }
        Pattern pattern = Pattern.compile("[\\u4E00-\\u9FBF]+");
        return pattern.matcher(str).matches();
    }

    /**
     * 是否包含漢字<br>
     * 根據漢字編碼范圍進行判斷<br>
     * CJK統一漢字（不包含中文的，。《》（）“‘'”、！￥等符號）<br>
     *
     * @param str
     * @return
     */
    public static boolean hasChineseByRange(String str) {
        if (str == null) {
            return false;
        }
        char[] ch = str.toCharArray();
        for (char c : ch) {
            if (c >= 0x4E00 && c <= 0x9FBF) {
                return true;
            }
        }
        return false;
    }

    /**
     * 是否全是漢字<br>
     * 根據漢字編碼范圍進行判斷<br>
     * CJK統一漢字（不包含中文的，。《》（）“‘'”、！￥等符號）<br>
     *
     * @param str
     * @return
     */
    public static boolean isChineseByRange(String str) {
        if (str == null) {
            return false;
        }
        char[] ch = str.toCharArray();
        for (char c : ch) {
            if (c < 0x4E00 || c > 0x9FBF) {
                return false;
            }
        }
        return true;
    }
}

如果僅僅去判斷是否是中文，不需判斷中文標點的話，推薦使用正則去匹配，可能更高效點。

還有另外一種投機取巧的方法：轉int類型，然后try...catch

參考：

http://www.jb51.net/article/79101.htm（以上內容轉自此篇文章）

http://blog.csdn.net/h082602/article/details/73251446

http://blog.csdn.net/u011240877/article/details/49907751

http://blog.csdn.net/l1028386804/article/details/43764073

http://blog.csdn.net/qwkxq/article/details/53508736

https://www.cnblogs.com/jinc/archive/2013/02/26/2933766.html

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 Java 判斷中文字符 java判斷獲取到的中文字符串是否亂碼 python利用utf-8編碼判斷中文字符中文字符畫 SQL判斷某列中是否包含中文字符、英文字符、純數字 java 獲取中文字符的首字母 JS正則表達式判斷字符串是否包含中文字符 Qt5顯示中文字符 python 中文字符的處理設置ubuntu默認中文字符