个人小站

[Javascript]中文等字符转化为unicode(UTF-16 LE)

字数统计: 919阅读时长: 5 min
2021/08/07

嘛,挺不错的。
原文:http://blog.csdn.net/geovindu/article/details/9119725

利用的javascript的 charCodeAt
http://www.w3school.com.cn/jsref/jsref_charCodeAt.asp
相关函数还有unescape() 以及escape()
网页上的unicode码常见的形式除了”\uXXXX”还有”&#xXXXX;”,其中“&#xXXXX;”是16进制,“&#XXXX”为10进制





This encoding utility requires JavaScript.

源码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
<input id="i0" name="lang" onclick="refresh()" type="radio" checked/>
<label for="i0" title="\u00FF only">Java String Encoder</label>
<input id="i1" name="lang" onclick="refresh()" type="radio" />
<label for="i1" title="\xFF and \u0100">JavaScript String Encoder</label>
<input id="i2" name="lang" onclick="refresh()" type="radio" />
<label for="i2" title="\xFF and \u0100">Decoder</label>
<div>
<textarea onkeyup="refresh()" rows="8" cols="100">// Paste some Java or JavaScript code into this window.
german = "Übergröße 塘㙍镇";
smilie = "☺";
</textarea>
</div>
<pre><code id="result" name="result">This encoding utility requires JavaScript.</code></pre>
<script type="text/javascript">


// 127 bytes
var encodeJavaScriptString = function f(a, b)
{
return ++b //`b` is a number (including 0) when `replace` calls the function
? '\\' + ( //all escape sequences start with a backslash
(a = a.charCodeAt()) >> 12 //all characters from U+1000 and above
? 'u' //must start with `\u`
: a >> 8 //all characters from U+0100 to U+0FFF
? 'u0' //must start with `\u0`
: 'x' //characters from U+007F to U+00FF can start with `\u00` or `\x`
) + a.toString(16).toUpperCase() //add the upper case hex string (it does not contain leading zeros)
: a.replace(/[^\0-~]/g, f) //else call the function for all non-ASCII characters (all except U+0000 to U+007E)
}
// 115 bytes
var encodeJavaString = function e(a, b)
{
return ++b //`b` is a number when `replace` calls the function
? '\\u' + //in Java all escape sequences must start with `\u`
('00' + a.charCodeAt().toString(16)) //build a hex string with at least 4 characters
.slice(-4).toUpperCase() //use the last 4 characters and make them upper case
: a.replace(/[^\0-~]/g, e) //else call the function for all non-ASCII characters (all except U+0000 to U+007E)
}
// 89 bytes
var reconvert = function (str,b){
str = str.replace(/(\\u)(\w{4})/gi,function($0){
return (String.fromCharCode(parseInt((escape($0).replace(/(%5Cu)(\w{4})/g,"$2")),16)));
});

str = str.replace(/(&#x)(\w{4});/gi,function($0){
return String.fromCharCode(parseInt(escape($0).replace(/(%26%23x)(\w{4})(%3B)/g,"$2"),16));
});
return str;
}
var refresh = function()
{
var t = document.getElementsByTagName('TEXTAREA')[0];
var p = document.getElementsByTagName('CODE')[0];
if(document.getElementById('i1').checked)var f = encodeJavaScriptString;
else if (document.getElementById('i0').checked)var f = encodeJavaString;
else var f = reconvert;
p.firstChild.data = f(t.value).replace(/\r\n/g, '\n');
}
refresh();
</script>

原文作者:ted423

原文链接:http://ted423.github.io/Code/WWW/2unicode/

发表日期:August 7th 2021, 12:31:25 am

更新日期:January 1st 2015, 12:00:00 am

版权声明:本站原创内容(一般是语句不通顺的那种)采用知识共享署名-非商业性使用 4.0 国际许可协议进行许可,转载内容以及不带个人观点的分享不在此例,摘抄有Wiki的内容的文章统一根据Wiki采用 CC BY-SA 3.0

CATALOG