用Javascript（js）進行HTML轉義工具（處理特殊字符顯示）

本文轉載自查看原文 2016-12-13 16:37 14320 JavaScript

轉自：http://blog.csdn.net/hj7jay/article/details/51280405

眾所周知頁面上的字符內容通常都需要進行HTML轉義才能正確顯示，尤其對於Input，Textarea提交的內容，更是要進行轉義以防止javascript注入攻擊。

通常的HTML轉義主要是針對內容中的"<"，">"，"&"，以及空格、單雙引號等。但其實還有很多字符也需要進行轉義。具體的可以參考這篇文章。

** 1、HTML轉義

參考上面的提到的文章，基本上可以確定以下的轉義的范圍和方式。

1）對"\""、"&"、"'"、"<"、">"、空格(0x20)、0x00到0x20、0x7F-0xFF

以及0x0100-0x2700的字符進行轉義，基本上就覆蓋的比較全面了。

用javascript的正則表達式可以寫為：

this.REGX_HTML_ENCODE = /"|&|'|<|>|[\x00-\x20]|[\x7F-\xFF]|[\u0100-\u2700]/g;

2）為保證轉義結果對瀏覽器的無差別，轉義編碼為實體編號，而不用實體名稱。

3）空格(0x20)通常轉義為“ ”也就是“ ”。

轉義的代碼非常簡單：

 
              [javascript]  
              view plain 
              copy
  
               
               
             
this.encodeHtml = function(s){  
      return (typeof s != "string") ? s :  
          s.replace(this.REGX_HTML_ENCODE,  
                    function($0){  
                        var c = $0.charCodeAt(0), r = ["&#"];  
                        c = (c == 0x20) ? 0xA0 : c;  
                        r.push(c); r.push(";");  
                        return r.join("");  
                    });  
  };  

** 2、反轉義

既然有轉義，自然需要反轉義。

1）對“&#num;”實體編號的轉義，直接提取編號然后fromCharCode就可以得到字符。

2）對於諸如“<”，需要建立一張如下的表來查詢。

this.HTML_DECODE = {

"<" : "<",

">" : ">",

"&" : "&",

" ": " ",

""": "\"",

"©": "©"

// Add more

};

由此我們可以有反轉義的正則表達式：

this.REGX_HTML_DECODE = /&\w+;|&#(\d+);/g;

反轉的代碼也很簡單，如下：

 
               [javascript]  
               view plain 
               copy
  
                
                
              
this.decodeHtml = function(s){  
      return (typeof s != "string") ? s :  
          s.replace(this.REGX_HTML_DECODE,  
                    function($0,$1){  
                        var c = this.HTML_ENCODE[$0]; // 嘗試查表  
                        if(c === undefined){  
                            // Maybe is Entity Number  
                            if(!isNaN($1)){  
                                c = String.fromCharCode(($1 == 160) ? 32 : $1);  
                            }else{  
                                // Not Entity Number  
                                c = $0;  
                            }  
                        }  
                        return c;  
                    });  
  };  

** 3、一個有意思的認識

其實在用正則表達式轉義之前，我一直都是用遍歷整個字符串，逐個比較字符的方式。直到有一天，看到一篇文章說，javascript正則表達式是C實現的，比自己用javascript遍歷字符要快，於是我就試着改寫成上面這種方式。雖然代碼看起來的確顯得神秘而又牛叉，但遺憾的是，在我的Chrome 11 (FreeBSD 64 9.0)上，遍歷字符轉義/反轉的方式要比上面正則表達式的代碼快2到3倍（字符串長度越長越明顯）。其實，想想也能明白為什么。

** 4、完整版本的代碼

 
              [javascript]  
              view plain 
              copy
  
               
               
             
$package("js.lang"); // 沒有包管理時，也可簡單寫成 js = {lang:{}};  
  
js.lang.String = function(){  
  
    this.REGX_HTML_ENCODE = /"|&|'|<|>|[\x00-\x20]|[\x7F-\xFF]|[\u0100-\u2700]/g;  
  
    this.REGX_HTML_DECODE = /&\w+;|&#(\d+);/g;  
  
    this.REGX_TRIM = /(^\s*)|(\s*$)/g;  
  
    this.HTML_DECODE = {  
        "<" : "<",   
        ">" : ">",   
        "&" : "&",   
        " ": " ",   
        """: "\"",   
        "©": ""  
  
        // Add more  
    };  
  
    this.encodeHtml = function(s){  
        s = (s != undefined) ? s : this.toString();  
        return (typeof s != "string") ? s :  
            s.replace(this.REGX_HTML_ENCODE,   
                      function($0){  
                          var c = $0.charCodeAt(0), r = ["&#"];  
                          c = (c == 0x20) ? 0xA0 : c;  
                          r.push(c); r.push(";");  
                          return r.join("");  
                      });  
    };  
  
    this.decodeHtml = function(s){  
        var HTML_DECODE = this.HTML_DECODE;  
  
        s = (s != undefined) ? s : this.toString();  
        return (typeof s != "string") ? s :  
            s.replace(this.REGX_HTML_DECODE,  
                      function($0, $1){  
                          var c = HTML_DECODE[$0];  
                          if(c == undefined){  
                              // Maybe is Entity Number  
                              if(!isNaN($1)){  
                                  c = String.fromCharCode(($1 == 160) ? 32:$1);  
                              }else{  
                                  c = $0;  
                              }  
                          }  
                          return c;  
                      });  
    };  
  
    this.trim = function(s){  
        s = (s != undefined) ? s : this.toString();  
        return (typeof s != "string") ? s :  
            s.replace(this.REGX_TRIM, "");  
    };  
  
  
    this.hashCode = function(){  
        var hash = this.__hash__, _char;  
        if(hash == undefined || hash == 0){  
            hash = 0;  
            for (var i = 0, len=this.length; i < len; i++) {  
                _char = this.charCodeAt(i);  
                hash = 31*hash + _char;  
                hash = hash & hash; // Convert to 32bit integer  
            }  
            hash = hash & 0x7fffffff;  
        }  
        this.__hash__ = hash;  
  
        return this.__hash__;   
    };  
  
};  
  
js.lang.String.call(js.lang.String);  

在實際的使用中可以有兩種方式：

1）使用js.lang.String.encodeHtml(s)和js.lang.String.decodeHtml(s)。

2）還可以直接擴展String的prototype

 
             [javascript]  
             view plain 
             copy
  
js.lang.String.call(String.prototype);  
  
  // 那么  
  
  var str = "&'\"中國abc def";  
  
  var ec_str = str.encodeHtml();  
    
  document.write(ec_str);  
    
  document.write(""); // CU的博客在線編輯有bug,  
放不上來！！！  
  
  var dc_str = ec_str.decodeHtml();  
  
  document.write(dc_str);

免責聲明！

本站轉載的文章為個人學習借鑒使用，本站對版權不負任何法律責任。如果侵犯了您的隱私權益，請聯系本站郵箱yoyou2525@163.com刪除。

猜您在找 js如何轉義和反轉義html特殊字符 js特殊字符轉義 HTML轉義工具 [Javascript版] js特殊字符轉義 html.特殊字符（字符轉義）特殊字符html,css轉義大全 java 實現html特殊字符轉義 html特殊字符轉義問題（轉！） Solr特殊字符轉義處理利用StringEscapeUtils來轉義和反轉義html/xml/javascript中的特殊字符