背景:Kindeditor內容保存在數據庫中的類型是text,包含文字和HTML標簽。
需求:顯示內容的前50個字(純文字內容)
方法:將字段查出去除標簽,截取前50
1 import java.util.regex.Matcher; 2 import java.util.regex.Pattern; 3 4 public class StrUtils { 5 6 private static final String regEx_script = "<script[^>]*?>[\\s\\S]*?<\\/script>"; // 定義script的正則表達式 7 private static final String regEx_style = "<style[^>]*?>[\\s\\S]*?<\\/style>"; // 定義style的正則表達式 8 private static final String regEx_html = "<[^>]+>"; // 定義HTML標簽的正則表達式 9 private static final String regEx_img = "<img\\s*([^>]*)\\s*src=\\\"(.*?)\\\"\\s*([^>]*)>";// 定義image標簽的正則表達式 10 private static final String regEx_emoji = "[\\ud83c\\udc00-\\ud83c\\udfff]|[\\ud83d\\udc00-\\ud83d\\udfff]|[\\ud83e\\udd00-\\ud83e\\udfff]|[\\u2600-\\u27ff]";// 定義表情標簽的正則表達式 11 private static final String regEx_space = "\\s*|\t|\r|\n";//定義空格回車換行符 12 private static final String regEx_special = "\\&[a-zA-Z]{1,10};";//定義特殊字符 13 14 15 public static String delHTMLTag(String htmlStr) { 16 17 // 過濾script標簽 18 Pattern p_script = Pattern.compile(regEx_script, Pattern.CASE_INSENSITIVE); 19 Matcher m_script = p_script.matcher(htmlStr); 20 htmlStr = m_script.replaceAll(""); 21 22 // 過濾style標簽 23 Pattern p_style = Pattern.compile(regEx_style, Pattern.CASE_INSENSITIVE); 24 Matcher m_style = p_style.matcher(htmlStr); 25 htmlStr = m_style.replaceAll(""); 26 27 // 過濾image標簽 28 Pattern p_img = Pattern.compile(regEx_img, Pattern.CASE_INSENSITIVE); 29 Matcher m_img = p_img.matcher(htmlStr); 30 htmlStr = m_img.replaceAll(""); 31 32 // 過濾emoji標簽 33 Pattern p_emoji = Pattern.compile(regEx_emoji, Pattern.CASE_INSENSITIVE); 34 Matcher m_emoji = p_emoji.matcher(htmlStr); 35 htmlStr = m_emoji.replaceAll(""); 36 37 // 過濾html標簽 38 Pattern p_html = Pattern.compile(regEx_html, Pattern.CASE_INSENSITIVE); 39 Matcher m_html = p_html.matcher(htmlStr); 40 htmlStr = m_html.replaceAll(""); 41 42 // 過濾空格回車標簽 43 Pattern p_space = Pattern.compile(regEx_space, Pattern.CASE_INSENSITIVE); 44 Matcher m_space = p_space.matcher(htmlStr); 45 htmlStr = m_space.replaceAll(""); 46 47 // 過濾特殊字符 48 Pattern p_special = Pattern.compile(regEx_special, Pattern.CASE_INSENSITIVE); 49 Matcher m_special = p_special.matcher(htmlStr); 50 htmlStr = m_special.replaceAll(""); 51 52 return htmlStr.trim(); // 返回文本字符串 53 } 54 55 public static String getTextFromHtml(String htmlStr){ 56 htmlStr = delHTMLTag(htmlStr); 57 htmlStr = htmlStr.replaceAll(" ", ""); 58 if (htmlStr.length()>50){ 59 htmlStr = htmlStr.substring(0,50); 60 } 61 return htmlStr; 62 } 63 64 }