wrs-ASCIIUtils.js 7.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233
  1. import * as HexUtils from './wrs-HexUtils.js';
  2. export function encodeUtf8(str) {
  3. // 方法1:
  4. // const utf8 = [];
  5. // for (let ii = 0; ii < str.length; ii++) {
  6. // let charCode = str.charCodeAt(ii);
  7. // if (charCode < 0x80){
  8. // utf8.push(charCode);
  9. // } else if (charCode < 0x800) {
  10. // utf8.push(0xc0 | (charCode >> 6), 0x80 | (charCode & 0x3f));
  11. // } else if (charCode < 0xd800 || charCode >= 0xe000) {
  12. // utf8.push(0xe0 | (charCode >> 12), 0x80 | ((charCode >> 6) & 0x3f), 0x80 | (charCode & 0x3f));
  13. // } else {
  14. // ii++;
  15. // // Surrogate pair:
  16. // // UTF-16 encodes 0x10000-0x10FFFF by subtracting 0x10000 and
  17. // // splitting the 20 bits of 0x0-0xFFFFF into two halves
  18. // charCode = 0x10000 + (((charCode & 0x3ff) << 10) | (str.charCodeAt(ii) & 0x3ff));
  19. // utf8.push(
  20. // 0xf0 | (charCode >> 18),
  21. // 0x80 | ((charCode >> 12) & 0x3f),
  22. // 0x80 | ((charCode >> 6) & 0x3f),
  23. // 0x80 | (charCode & 0x3f),
  24. // );
  25. // }
  26. // }
  27. // return utf8;
  28. // 方法2:
  29. // var bytes = []
  30. // for (let ii = 0; ii < str.length; ii++) {
  31. // // for...of循环,能正确识别 32 位的 UTF-16 字符, 可以查阅资料了解。
  32. // let code = str.charCodeAt(ii);
  33. // // let code = ch.codePointAt(0)
  34. // if (code >= 65536 && code <= 1114111) {// 位运算, 补齐8位
  35. // bytes.push((code >> 18) | 0xf0)
  36. // bytes.push(((code >> 12) & 0x3f) | 0x80)
  37. // bytes.push(((code >> 6) & 0x3f) | 0x80)
  38. // bytes.push((code & 0x3f) | 0x80)
  39. // } else if (code >= 2048 && code <= 65535) {
  40. // bytes.push((code >> 12) | 0xe0)
  41. // bytes.push(((code >> 6) & 0x3f) | 0x80)
  42. // bytes.push((code & 0x3f) | 0x80)
  43. // } else if (code >= 128 && code <= 2047) {
  44. // bytes.push((code >> 6) | 0xc0)
  45. // bytes.push((code & 0x3f) | 0x80)
  46. // } else {
  47. // bytes.push(code)
  48. // }
  49. // }
  50. // return bytes
  51. // 方法3,android不支持TextEncoder
  52. // let encoder = new TextEncoder();
  53. // let utf8 = encoder.encode(str); // UTF-8编码后的二进制数据
  54. // return utf8
  55. const encoded = encodeURIComponent(str);
  56. const charCodes = [];
  57. let i = 0;
  58. while (encoded[i]) {
  59. if (encoded[i] === '%') {
  60. const substr = encoded.slice(i, i + 3);
  61. charCodes.push(parseInt(substr.slice(-2), 16));
  62. i += 3;
  63. } else {
  64. charCodes.push(encoded[i].charCodeAt(0));
  65. i++;
  66. }
  67. }
  68. return charCodes
  69. // return Uint8Array.from(charCodes).buffer;
  70. }
  71. export function decodeUtf8(str) {
  72. let arr = HexUtils.hexToUint8Array(str);
  73. const REPL_CHAR = 0xFFFD;
  74. const hex = x => x.toString(16).padStart(2, '0');
  75. const xs = Array.from(arr);
  76. const res = [];
  77. let i = 0;
  78. while (i < xs.length && i < 3 && xs[i] && (xs[i] & 0xC0) === 0x80) {
  79. res.push(REPL_CHAR); // replacement for continuation byte
  80. i++;
  81. }
  82. if (i >= xs.length) return String.fromCodePoint(...res);
  83. if (!(
  84. ((xs[i] & 0x80) === 0) || // 1 byte
  85. ((xs[i] & 0xE0) === 0xC0) || // 2 byte
  86. ((xs[i] & 0xF0) === 0xE0) || // 3 byte
  87. ((xs[i] & 0xF8) === 0xF0) // 4 byte
  88. )) {
  89. throw new Error(`invalid utf-8. Expected a leading byte at index ${i} actual ${hex(xs[i])}`);
  90. }
  91. for (; i < xs.length; i++) {
  92. const x = xs[i];
  93. if ((x & 0x80) === 0) {
  94. // 1 byte
  95. res.push(x);
  96. continue;
  97. }
  98. if ((x & 0xE0) === 0xC0) {
  99. // 2 byte
  100. if (i + 1 >= xs.length) {
  101. res.push(REPL_CHAR); // replacement for 1st byte, 2nd byte is past end of string
  102. break;
  103. }
  104. const x1 = xs[i + 1];
  105. if ((x1 & 0xC0) !== 0x80) {
  106. throw new Error(`invalid utf-8. Expected a continuation byte at index ${i + 1} actual ${hex(x1)}`);
  107. }
  108. const c = ((x & 0x1F) << 6) | (x1 & 0x3F);
  109. if (c < 0x80 || c >= 0x800) {
  110. throw new Error(`invalid utf-8. Expected an integer between 0x80 and 0x800 at index ${i} actual ${c}`);
  111. }
  112. res.push(c);
  113. i++;
  114. continue;
  115. }
  116. if ((x & 0xF0) === 0xE0) {
  117. // 3 byte
  118. if (i + 2 >= xs.length) {
  119. res.push(REPL_CHAR); // replacement for 1st byte
  120. if (i + 1 < xs.length) res.push(REPL_CHAR); // replacement for 2nd byte, 3rd byte is past end of string
  121. break;
  122. }
  123. const x1 = xs[i + 1];
  124. if ((x1 & 0xC0) !== 0x80) {
  125. throw new Error(`invalid utf-8. Expected a continuation byte at index ${i + 1} actual ${hex(x1)}`);
  126. }
  127. const x2 = xs[i + 2];
  128. if ((x2 & 0xC0) !== 0x80) {
  129. throw new Error(`invalid utf-8. Expected a continuation byte at index ${i + 2} actual ${hex(x2)}`);
  130. }
  131. const c = ((x & 0x0F) << 12) | ((x1 & 0x3F) << 6) | (x2 & 0x3F);
  132. if (c < 0x800 || c >= 0x10000) {
  133. throw new Error(
  134. `invalid utf-8. Expected an integer between 0x800 and 0x10000 at index ${i} actual ${c}`);
  135. }
  136. res.push(c);
  137. i += 2;
  138. continue;
  139. }
  140. if ((x & 0xF8) === 0xF0) {
  141. // 4 byte
  142. if (i + 3 >= xs.length) {
  143. res.push(REPL_CHAR); // replacement for 1st byte
  144. if (i + 1 < xs.length) res.push(REPL_CHAR); // replacement for 2nd byte
  145. if (i + 2 < xs.length) res.push(REPL_CHAR); // replacement for 3rd byte, 4th byte is past end of string
  146. break;
  147. }
  148. const x1 = xs[i + 1];
  149. if ((x1 & 0xC0) !== 0x80) {
  150. throw new Error(`invalid utf-8. Expected a continuation byte at index ${i + 1} actual ${hex(x1)}`);
  151. }
  152. const x2 = xs[i + 2];
  153. if ((x2 & 0xC0) !== 0x80) {
  154. throw new Error(`invalid utf-8. Expected a continuation byte at index ${i + 2} actual ${hex(x2)}`);
  155. }
  156. const x3 = xs[i + 3];
  157. if ((x3 & 0xC0) !== 0x80) {
  158. throw new Error(`invalid utf-8. Expected a continuation byte at index ${i + 3} actual ${hex(x3)}`);
  159. }
  160. const c = ((x & 0x07) << 18) | ((x1 & 0x3F) << 12) | ((x2 & 0x3F) << 6) | (x3 & 0x3F);
  161. if (c < 0x10000) {
  162. throw new Error(`invalid utf-8. Expected an integer above 0x10000 at index ${i} actual ${c}`);
  163. }
  164. res.push(c);
  165. i += 3;
  166. continue;
  167. }
  168. throw new Error(`invalid utf-8. Expected a leading byte at index ${i} actual ${hex(x)}`);
  169. }
  170. return String.fromCodePoint(...res);
  171. // const u8 = new Uint8Array(buffer);
  172. // const oneByteChars = String.fromCharCode(...u8);
  173. // return btoa(oneByteChars);
  174. // let uint8Array = HexUtils.hexToUint8Array(str);
  175. // let decoder = new TextDecoder();
  176. // let result = decoder.decode(uint8Array); // 解码后的字符串
  177. // return result
  178. // let strValue = ''
  179. // let obStr = [...str].map((ch) => {
  180. // return padStart(parseInt(ch, 16).toString(2), 4, 0)
  181. // }).join('').match(/\d{8}/g).map((item) => parseInt(item, 2))
  182. // for (var i = 0; i < obStr.length;) {
  183. // let code = obStr[i]
  184. // let code1, code2, code3, code4, hex
  185. // if ((code & 240) == 240) {
  186. // code1 = (code & 0x03).toString(2)
  187. // code2 = padStart((obStr[i + 1] & 0x3f).toString(2), 6, '0')
  188. // code3 = padStart((obStr[i + 2] & 0x3f).toString(2), 6, '0')
  189. // code4 = padStart((obStr[i + 3] & 0x3f).toString(2), 6, '0')
  190. // hex = parseInt((code1 + code2 + code3 + code4), 2)
  191. // strValue = strValue + String.fromCodePoint(hex)
  192. // i = i + 4
  193. // } else if ((code & 224) == 224) {
  194. // code1 = (code & 0x07).toString(2)
  195. // code2 = padStart((obStr[i + 1] & 0x3f).toString(2), 6, '0')
  196. // code3 = padStart((obStr[i + 2] & 0x3f).toString(2), 6, '0')
  197. // hex = parseInt((code1 + code2 + code3), 2)
  198. // strValue = strValue + String.fromCodePoint(hex)
  199. // i = i + 3
  200. // } else if ((code & 192) == 192) {
  201. // code1 = (code & 0x0f).toString(2)
  202. // code2 = padStart((obStr[i + 1] & 0x3f).toString(2), 6, '0')
  203. // hex = parseInt((obStr + code2), 2)
  204. // strValue = strValue + String.fromCodePoint(hex)
  205. // i = i + 2
  206. // } else {
  207. // hex = code
  208. // strValue = strValue + String.fromCodePoint(code)
  209. // i = i + 1
  210. // }
  211. // }
  212. // return strValue
  213. }
  214. // module.exports = {
  215. // encodeUtf8: encodeUtf8,
  216. // decodeUtf8: decodeUtf8
  217. // }
  218. function padStart(str, len, prefix) {
  219. return ((new Array(len + 1).join(prefix)) + str).slice(-len) // 也可用 new Array(len+1).fill(0)
  220. }