方式一:
1 import java.net.MalformedURLException; 2 import java.net.URL; 3 import java.util.Arrays; 4 import java.util.HashSet; 5 import java.util.Set; 6 import java.util.regex.Pattern; 7 8 public class URLUtil { 9 10 private final static Set<String> PublicSuffixSet = new HashSet<String>( 11 Arrays.asList(new String( 12 "com|org|net|gov|edu|co|tv|mobi|info|asia|xxx|onion|cn|com.cn|edu.cn|gov.cn|net.cn|org.cn|jp|kr|tw|com.hk|hk|com.hk|org.hk|se|com.se|org.se") 13 .split("\\|"))); 14 15 private static Pattern IP_PATTERN = Pattern.compile("(\\d{1,3}\\.){3}(\\d{1,3})"); 16 17 /** 18 * 獲取url的頂級域名 19 * @param url 20 * @return 21 */ 22 public static String getDomainName(URL url) { 23 String host = url.getHost(); 24 if (host.endsWith(".")){ 25 host = host.substring(0, host.length() - 1); 26 } 27 if (IP_PATTERN.matcher(host).matches()){ 28 return host; 29 } 30 31 int index = 0; 32 String candidate = host; 33 for (; index >= 0;) { 34 index = candidate.indexOf('.'); 35 String subCandidate = candidate.substring(index + 1); 36 if (PublicSuffixSet.contains(subCandidate)) { 37 return candidate; 38 } 39 candidate = subCandidate; 40 } 41 return candidate; 42 } 43 44 /** 45 * 獲取url的頂級域名 46 * @param url 47 * @return 48 * @throws MalformedURLException 49 */ 50 public static String getDomainName(String url) throws MalformedURLException { 51 return getDomainName(new URL(url)); 52 } 53 54 /** 55 * 判斷兩個url頂級域名是否相等 56 * @param url1 57 * @param url2 58 * @return 59 */ 60 public static boolean isSameDomainName(URL url1, URL url2) { 61 return getDomainName(url1).equalsIgnoreCase(getDomainName(url2)); 62 } 63 64 /** 65 * 判斷兩個url頂級域名是否相等 66 * @param url1 67 * @param url2 68 * @return 69 * @throws MalformedURLException 70 */ 71 public static boolean isSameDomainName(String url1, String url2) 72 throws MalformedURLException { 73 return isSameDomainName(new URL(url1), new URL(url2)); 74 } 75 76 public static void main(String[] args) throws Exception { 77 String urlStr = "http://news.hexun.com/2017-09-23/190978248.html"; 78 getDomainName(urlStr); 79 getDomainName(new URL(urlStr)); 80 } 81 82 }
方式二:
1 import java.net.MalformedURLException; 2 import java.net.URL; 3 import java.util.regex.Matcher; 4 import java.util.regex.Pattern; 5 6 public class DomainUtils { 7 /** 8 * 獲取url的頂級域名 9 * @param 10 * @return 11 */ 12 public static String getTopDomain(String url){ 13 try{ 14 //獲取值轉換為小寫 15 String host = new URL(url).getHost().toLowerCase();//news.hexun.com 16 Pattern pattern = Pattern.compile("[^\\.]+(\\.com\\.cn|\\.net\\.cn|\\.org\\.cn|\\.gov\\.cn|\\.com|\\.net|\\.cn|\\.org|\\.cc|\\.me|\\.tel|\\.mobi|\\.asia|\\.biz|\\.info|\\.name|\\.tv|\\.hk|\\.公司|\\.中國|\\.網絡)"); 17 Matcher matcher = pattern.matcher(host); 18 while(matcher.find()){ 19 return matcher.group(); 20 } 21 }catch(MalformedURLException e){ 22 e.printStackTrace(); 23 } 24 return null; 25 } 26 public static void main(String[] args) { 27 System.out.println(getTopDomain("http://news.hexun.com/2017-09-23/190978248.html"));//hexun.com 28 29 } 30 }