57
node_modules/@exodus/bytes/fallback/_utils.js
generated
vendored
Normal file
57
node_modules/@exodus/bytes/fallback/_utils.js
generated
vendored
Normal file
@@ -0,0 +1,57 @@
|
||||
export * from './platform.js'
|
||||
|
||||
const Buffer = /* @__PURE__ */ (() => globalThis.Buffer)()
|
||||
|
||||
export function assert(condition, msg) {
|
||||
if (!condition) throw new Error(msg)
|
||||
}
|
||||
|
||||
export function assertU8(arg) {
|
||||
if (!(arg instanceof Uint8Array)) throw new TypeError('Expected an Uint8Array')
|
||||
}
|
||||
|
||||
// On arrays in heap (<= 64) it's cheaper to copy into a pooled buffer than lazy-create the ArrayBuffer storage
|
||||
export const toBuf = (x) =>
|
||||
x.byteLength <= 64 && x.BYTES_PER_ELEMENT === 1
|
||||
? Buffer.from(x)
|
||||
: Buffer.from(x.buffer, x.byteOffset, x.byteLength)
|
||||
|
||||
export const E_STRING = 'Input is not a string'
|
||||
export const E_STRICT_UNICODE = 'Input is not well-formed Unicode'
|
||||
|
||||
// Input is never pooled
|
||||
export function fromUint8(arr, format) {
|
||||
switch (format) {
|
||||
case 'uint8':
|
||||
if (arr.constructor !== Uint8Array) throw new Error('Unexpected')
|
||||
return arr
|
||||
case 'arraybuffer':
|
||||
if (arr.byteLength !== arr.buffer.byteLength) throw new Error('Unexpected')
|
||||
return arr.buffer
|
||||
case 'buffer':
|
||||
if (arr.length <= 64) return Buffer.from(arr)
|
||||
return Buffer.from(arr.buffer, arr.byteOffset, arr.byteLength)
|
||||
}
|
||||
|
||||
throw new TypeError('Unexpected format')
|
||||
}
|
||||
|
||||
// Input can be pooled
|
||||
export function fromBuffer(arr, format) {
|
||||
switch (format) {
|
||||
case 'uint8':
|
||||
// byteOffset check is slightly faster and covers most pooling, so it comes first
|
||||
if (arr.length <= 64 || arr.byteOffset !== 0 || arr.byteLength !== arr.buffer.byteLength) {
|
||||
return new Uint8Array(arr)
|
||||
}
|
||||
|
||||
return new Uint8Array(arr.buffer, arr.byteOffset, arr.byteLength)
|
||||
case 'arraybuffer':
|
||||
return fromBuffer(arr, 'uint8').buffer
|
||||
case 'buffer':
|
||||
if (arr.constructor !== Buffer) throw new Error('Unexpected')
|
||||
return arr
|
||||
}
|
||||
|
||||
throw new TypeError('Unexpected format')
|
||||
}
|
||||
242
node_modules/@exodus/bytes/fallback/base32.js
generated
vendored
Normal file
242
node_modules/@exodus/bytes/fallback/base32.js
generated
vendored
Normal file
@@ -0,0 +1,242 @@
|
||||
import { assertU8 } from './_utils.js'
|
||||
import { nativeEncoder, nativeDecoder, isHermes } from './platform.js'
|
||||
import { encodeAscii, decodeAscii } from './latin1.js'
|
||||
|
||||
// See https://datatracker.ietf.org/doc/html/rfc4648
|
||||
|
||||
const BASE32_HELPERS = [{}, {}, {}]
|
||||
const BASE32_ALPHABETS = [
|
||||
[...'ABCDEFGHIJKLMNOPQRSTUVWXYZ234567'], // RFC 4648, #6
|
||||
[...'0123456789ABCDEFGHIJKLMNOPQRSTUV'], // RFC 4648, #7
|
||||
[...'0123456789ABCDEFGHJKMNPQRSTVWXYZ'], // Crockford, base (see extra below in fromMap)
|
||||
]
|
||||
|
||||
export const E_CHAR = 'Invalid character in base32 input'
|
||||
export const E_PADDING = 'Invalid base32 padding'
|
||||
export const E_LENGTH = 'Invalid base32 length'
|
||||
export const E_LAST = 'Invalid last chunk'
|
||||
|
||||
const useTemplates = isHermes // Faster on Hermes and JSC, but we use it only on Hermes
|
||||
|
||||
// We construct output by concatenating chars, this seems to be fine enough on modern JS engines
|
||||
export function toBase32(arr, mode, padding) {
|
||||
assertU8(arr)
|
||||
const fullChunks = Math.floor(arr.length / 5)
|
||||
const fullChunksBytes = fullChunks * 5
|
||||
let o = ''
|
||||
let i = 0
|
||||
|
||||
const alphabet = BASE32_ALPHABETS[mode]
|
||||
const helpers = BASE32_HELPERS[mode]
|
||||
if (!helpers.pairs) {
|
||||
helpers.pairs = []
|
||||
if (nativeDecoder) {
|
||||
// Lazy to save memory in case if this is not needed
|
||||
helpers.codepairs = new Uint16Array(32 * 32)
|
||||
const u16 = helpers.codepairs
|
||||
const u8 = new Uint8Array(u16.buffer, u16.byteOffset, u16.byteLength) // write as 1-byte to ignore BE/LE difference
|
||||
for (let i = 0; i < 32; i++) {
|
||||
const ic = alphabet[i].charCodeAt(0)
|
||||
for (let j = 0; j < 32; j++) u8[(i << 6) | (j << 1)] = u8[(j << 6) | ((i << 1) + 1)] = ic
|
||||
}
|
||||
} else {
|
||||
const p = helpers.pairs
|
||||
for (let i = 0; i < 32; i++) {
|
||||
for (let j = 0; j < 32; j++) p.push(`${alphabet[i]}${alphabet[j]}`)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const { pairs, codepairs } = helpers
|
||||
|
||||
// Fast path for complete blocks
|
||||
// This whole loop can be commented out, the algorithm won't change, it's just an optimization of the next loop
|
||||
if (nativeDecoder) {
|
||||
const oa = new Uint16Array(fullChunks * 4)
|
||||
for (let j = 0; i < fullChunksBytes; i += 5) {
|
||||
const a = arr[i]
|
||||
const b = arr[i + 1]
|
||||
const c = arr[i + 2]
|
||||
const d = arr[i + 3]
|
||||
const e = arr[i + 4]
|
||||
const x0 = (a << 2) | (b >> 6) // 8 + 8 - 5 - 5 = 6 left
|
||||
const x1 = ((b & 0x3f) << 4) | (c >> 4) // 6 + 8 - 5 - 5 = 4 left
|
||||
const x2 = ((c & 0xf) << 6) | (d >> 2) // 4 + 8 - 5 - 5 = 2 left
|
||||
const x3 = ((d & 0x3) << 8) | e // 2 + 8 - 5 - 5 = 0 left
|
||||
oa[j] = codepairs[x0]
|
||||
oa[j + 1] = codepairs[x1]
|
||||
oa[j + 2] = codepairs[x2]
|
||||
oa[j + 3] = codepairs[x3]
|
||||
j += 4
|
||||
}
|
||||
|
||||
o = decodeAscii(oa)
|
||||
} else if (useTemplates) {
|
||||
// Templates are faster only on Hermes and JSC. Browsers have TextDecoder anyway
|
||||
for (; i < fullChunksBytes; i += 5) {
|
||||
const a = arr[i]
|
||||
const b = arr[i + 1]
|
||||
const c = arr[i + 2]
|
||||
const d = arr[i + 3]
|
||||
const e = arr[i + 4]
|
||||
const x0 = (a << 2) | (b >> 6) // 8 + 8 - 5 - 5 = 6 left
|
||||
const x1 = ((b & 0x3f) << 4) | (c >> 4) // 6 + 8 - 5 - 5 = 4 left
|
||||
const x2 = ((c & 0xf) << 6) | (d >> 2) // 4 + 8 - 5 - 5 = 2 left
|
||||
const x3 = ((d & 0x3) << 8) | e // 2 + 8 - 5 - 5 = 0 left
|
||||
o += `${pairs[x0]}${pairs[x1]}${pairs[x2]}${pairs[x3]}`
|
||||
}
|
||||
} else {
|
||||
for (; i < fullChunksBytes; i += 5) {
|
||||
const a = arr[i]
|
||||
const b = arr[i + 1]
|
||||
const c = arr[i + 2]
|
||||
const d = arr[i + 3]
|
||||
const e = arr[i + 4]
|
||||
const x0 = (a << 2) | (b >> 6) // 8 + 8 - 5 - 5 = 6 left
|
||||
const x1 = ((b & 0x3f) << 4) | (c >> 4) // 6 + 8 - 5 - 5 = 4 left
|
||||
const x2 = ((c & 0xf) << 6) | (d >> 2) // 4 + 8 - 5 - 5 = 2 left
|
||||
const x3 = ((d & 0x3) << 8) | e // 2 + 8 - 5 - 5 = 0 left
|
||||
o += pairs[x0]
|
||||
o += pairs[x1]
|
||||
o += pairs[x2]
|
||||
o += pairs[x3]
|
||||
}
|
||||
}
|
||||
|
||||
// If we have something left, process it with a full algo
|
||||
let carry = 0
|
||||
let shift = 3 // First byte needs to be shifted by 3 to get 5 bits
|
||||
for (; i < arr.length; i++) {
|
||||
const x = arr[i]
|
||||
o += alphabet[carry | (x >> shift)] // shift >= 3, so this fits
|
||||
if (shift >= 5) {
|
||||
shift -= 5
|
||||
o += alphabet[(x >> shift) & 0x1f]
|
||||
}
|
||||
|
||||
carry = (x << (5 - shift)) & 0x1f
|
||||
shift += 3 // Each byte prints 5 bits and leaves 3 bits
|
||||
}
|
||||
|
||||
if (shift !== 3) o += alphabet[carry] // shift 3 means we have no carry left
|
||||
if (padding) o += ['', '======', '====', '===', '='][arr.length - fullChunksBytes]
|
||||
|
||||
return o
|
||||
}
|
||||
|
||||
// TODO: can this be optimized? This only affects non-Hermes barebone engines though
|
||||
const mapSize = nativeEncoder ? 128 : 65_536 // we have to store 64 KiB map or recheck everything if we can't decode to byte array
|
||||
|
||||
export function fromBase32(str, mode) {
|
||||
let inputLength = str.length
|
||||
while (str[inputLength - 1] === '=') inputLength--
|
||||
const paddingLength = str.length - inputLength
|
||||
const tailLength = inputLength % 8
|
||||
const mainLength = inputLength - tailLength // multiples of 8
|
||||
if (![0, 2, 4, 5, 7].includes(tailLength)) throw new SyntaxError(E_LENGTH) // fast verification
|
||||
if (paddingLength > 7 || (paddingLength !== 0 && str.length % 8 !== 0)) {
|
||||
throw new SyntaxError(E_PADDING)
|
||||
}
|
||||
|
||||
const alphabet = BASE32_ALPHABETS[mode]
|
||||
const helpers = BASE32_HELPERS[mode]
|
||||
|
||||
if (!helpers.fromMap) {
|
||||
helpers.fromMap = new Int8Array(mapSize).fill(-1) // no regex input validation here, so we map all other bytes to -1 and recheck sign
|
||||
const m = helpers.fromMap
|
||||
alphabet.forEach((c, i) => {
|
||||
m[c.charCodeAt(0)] = m[c.toLowerCase().charCodeAt(0)] = i
|
||||
})
|
||||
|
||||
if (mode === 2) {
|
||||
// Extra Crockford mapping
|
||||
m[73] = m[76] = m[105] = m[108] = m[49] // ILil -> 1
|
||||
m[79] = m[111] = m[48] // Oo -> 0
|
||||
}
|
||||
}
|
||||
|
||||
const m = helpers.fromMap
|
||||
|
||||
const arr = new Uint8Array(Math.floor((inputLength * 5) / 8))
|
||||
let at = 0
|
||||
let i = 0
|
||||
|
||||
if (nativeEncoder) {
|
||||
const codes = encodeAscii(str, E_CHAR)
|
||||
for (; i < mainLength; i += 8) {
|
||||
// each 5 bits, grouped 5 * 4 = 20
|
||||
const x0 = codes[i]
|
||||
const x1 = codes[i + 1]
|
||||
const x2 = codes[i + 2]
|
||||
const x3 = codes[i + 3]
|
||||
const x4 = codes[i + 4]
|
||||
const x5 = codes[i + 5]
|
||||
const x6 = codes[i + 6]
|
||||
const x7 = codes[i + 7]
|
||||
const a = (m[x0] << 15) | (m[x1] << 10) | (m[x2] << 5) | m[x3]
|
||||
const b = (m[x4] << 15) | (m[x5] << 10) | (m[x6] << 5) | m[x7]
|
||||
if (a < 0 || b < 0) throw new SyntaxError(E_CHAR)
|
||||
arr[at] = a >> 12
|
||||
arr[at + 1] = (a >> 4) & 0xff
|
||||
arr[at + 2] = ((a << 4) & 0xff) | (b >> 16)
|
||||
arr[at + 3] = (b >> 8) & 0xff
|
||||
arr[at + 4] = b & 0xff
|
||||
at += 5
|
||||
}
|
||||
} else {
|
||||
for (; i < mainLength; i += 8) {
|
||||
// each 5 bits, grouped 5 * 4 = 20
|
||||
const x0 = str.charCodeAt(i)
|
||||
const x1 = str.charCodeAt(i + 1)
|
||||
const x2 = str.charCodeAt(i + 2)
|
||||
const x3 = str.charCodeAt(i + 3)
|
||||
const x4 = str.charCodeAt(i + 4)
|
||||
const x5 = str.charCodeAt(i + 5)
|
||||
const x6 = str.charCodeAt(i + 6)
|
||||
const x7 = str.charCodeAt(i + 7)
|
||||
const a = (m[x0] << 15) | (m[x1] << 10) | (m[x2] << 5) | m[x3]
|
||||
const b = (m[x4] << 15) | (m[x5] << 10) | (m[x6] << 5) | m[x7]
|
||||
if (a < 0 || b < 0) throw new SyntaxError(E_CHAR)
|
||||
arr[at] = a >> 12
|
||||
arr[at + 1] = (a >> 4) & 0xff
|
||||
arr[at + 2] = ((a << 4) & 0xff) | (b >> 16)
|
||||
arr[at + 3] = (b >> 8) & 0xff
|
||||
arr[at + 4] = b & 0xff
|
||||
at += 5
|
||||
}
|
||||
}
|
||||
|
||||
// Last block, valid tailLength: 0 2 4 5 7, checked already
|
||||
// We check last chunk to be strict
|
||||
if (tailLength < 2) return arr
|
||||
const ab = (m[str.charCodeAt(i++)] << 5) | m[str.charCodeAt(i++)]
|
||||
if (ab < 0) throw new SyntaxError(E_CHAR)
|
||||
arr[at++] = ab >> 2
|
||||
if (tailLength < 4) {
|
||||
if (ab & 0x3) throw new SyntaxError(E_LAST)
|
||||
return arr
|
||||
}
|
||||
|
||||
const cd = (m[str.charCodeAt(i++)] << 5) | m[str.charCodeAt(i++)]
|
||||
if (cd < 0) throw new SyntaxError(E_CHAR)
|
||||
arr[at++] = ((ab << 6) & 0xff) | (cd >> 4)
|
||||
if (tailLength < 5) {
|
||||
if (cd & 0xf) throw new SyntaxError(E_LAST)
|
||||
return arr
|
||||
}
|
||||
|
||||
const e = m[str.charCodeAt(i++)]
|
||||
if (e < 0) throw new SyntaxError(E_CHAR)
|
||||
arr[at++] = ((cd << 4) & 0xff) | (e >> 1) // 4 + 4
|
||||
if (tailLength < 7) {
|
||||
if (e & 0x1) throw new SyntaxError(E_LAST)
|
||||
return arr
|
||||
}
|
||||
|
||||
const fg = (m[str.charCodeAt(i++)] << 5) | m[str.charCodeAt(i++)]
|
||||
if (fg < 0) throw new SyntaxError(E_CHAR)
|
||||
arr[at++] = ((e << 7) & 0xff) | (fg >> 3) // 1 + 5 + 2
|
||||
// Can't be 8, so no h
|
||||
if (fg & 0x7) throw new SyntaxError(E_LAST)
|
||||
return arr
|
||||
}
|
||||
52
node_modules/@exodus/bytes/fallback/base58check.js
generated
vendored
Normal file
52
node_modules/@exodus/bytes/fallback/base58check.js
generated
vendored
Normal file
@@ -0,0 +1,52 @@
|
||||
import { toBase58, fromBase58 } from '@exodus/bytes/base58.js'
|
||||
import { assertU8, fromUint8 } from './_utils.js'
|
||||
|
||||
const E_CHECKSUM = 'Invalid checksum'
|
||||
|
||||
// checksum length is 4, i.e. only the first 4 bytes of the hash are used
|
||||
|
||||
function encodeWithChecksum(arr, checksum) {
|
||||
// arr type in already validated in input
|
||||
const res = new Uint8Array(arr.length + 4)
|
||||
res.set(arr, 0)
|
||||
res.set(checksum.slice(0, 4), arr.length)
|
||||
return toBase58(res)
|
||||
}
|
||||
|
||||
function decodeWithChecksum(str) {
|
||||
const arr = fromBase58(str) // checks input
|
||||
const payloadSize = arr.length - 4
|
||||
if (payloadSize < 0) throw new Error(E_CHECKSUM)
|
||||
return [arr.slice(0, payloadSize), arr.slice(payloadSize)]
|
||||
}
|
||||
|
||||
function assertChecksum(c, r) {
|
||||
if ((c[0] ^ r[0]) | (c[1] ^ r[1]) | (c[2] ^ r[2]) | (c[3] ^ r[3])) throw new Error(E_CHECKSUM)
|
||||
}
|
||||
|
||||
export const makeBase58check = (hashAlgo, hashAlgoSync) => {
|
||||
const apis = {
|
||||
async encode(arr) {
|
||||
assertU8(arr)
|
||||
return encodeWithChecksum(arr, await hashAlgo(arr))
|
||||
},
|
||||
async decode(str, format = 'uint8') {
|
||||
const [payload, checksum] = decodeWithChecksum(str)
|
||||
assertChecksum(checksum, await hashAlgo(payload))
|
||||
return fromUint8(payload, format)
|
||||
},
|
||||
}
|
||||
if (!hashAlgoSync) return apis
|
||||
return {
|
||||
...apis,
|
||||
encodeSync(arr) {
|
||||
assertU8(arr)
|
||||
return encodeWithChecksum(arr, hashAlgoSync(arr))
|
||||
},
|
||||
decodeSync(str, format = 'uint8') {
|
||||
const [payload, checksum] = decodeWithChecksum(str)
|
||||
assertChecksum(checksum, hashAlgoSync(payload))
|
||||
return fromUint8(payload, format)
|
||||
},
|
||||
}
|
||||
}
|
||||
191
node_modules/@exodus/bytes/fallback/base64.js
generated
vendored
Normal file
191
node_modules/@exodus/bytes/fallback/base64.js
generated
vendored
Normal file
@@ -0,0 +1,191 @@
|
||||
import { nativeEncoder, nativeDecoder } from './platform.js'
|
||||
import { encodeAscii, decodeAscii } from './latin1.js'
|
||||
|
||||
// See https://datatracker.ietf.org/doc/html/rfc4648
|
||||
|
||||
const BASE64 = [...'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/']
|
||||
const BASE64URL = [...'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_']
|
||||
const BASE64_HELPERS = {}
|
||||
const BASE64URL_HELPERS = {}
|
||||
|
||||
export const E_CHAR = 'Invalid character in base64 input'
|
||||
export const E_PADDING = 'Invalid base64 padding'
|
||||
export const E_LENGTH = 'Invalid base64 length'
|
||||
export const E_LAST = 'Invalid last chunk'
|
||||
|
||||
// We construct output by concatenating chars, this seems to be fine enough on modern JS engines
|
||||
// Expects a checked Uint8Array
|
||||
export function toBase64(arr, isURL, padding) {
|
||||
const fullChunks = (arr.length / 3) | 0
|
||||
const fullChunksBytes = fullChunks * 3
|
||||
let o = ''
|
||||
let i = 0
|
||||
|
||||
const alphabet = isURL ? BASE64URL : BASE64
|
||||
const helpers = isURL ? BASE64URL_HELPERS : BASE64_HELPERS
|
||||
if (!helpers.pairs) {
|
||||
helpers.pairs = []
|
||||
if (nativeDecoder) {
|
||||
// Lazy to save memory in case if this is not needed
|
||||
helpers.codepairs = new Uint16Array(64 * 64)
|
||||
const u16 = helpers.codepairs
|
||||
const u8 = new Uint8Array(u16.buffer, u16.byteOffset, u16.byteLength) // write as 1-byte to ignore BE/LE difference
|
||||
for (let i = 0; i < 64; i++) {
|
||||
const ic = alphabet[i].charCodeAt(0)
|
||||
for (let j = 0; j < 64; j++) u8[(i << 7) | (j << 1)] = u8[(j << 7) | ((i << 1) + 1)] = ic
|
||||
}
|
||||
} else {
|
||||
const p = helpers.pairs
|
||||
for (let i = 0; i < 64; i++) {
|
||||
for (let j = 0; j < 64; j++) p.push(`${alphabet[i]}${alphabet[j]}`)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const { pairs, codepairs } = helpers
|
||||
|
||||
// Fast path for complete blocks
|
||||
// This whole loop can be commented out, the algorithm won't change, it's just an optimization of the next loop
|
||||
if (nativeDecoder) {
|
||||
const oa = new Uint16Array(fullChunks * 2)
|
||||
let j = 0
|
||||
for (const last = arr.length - 11; i < last; i += 12, j += 8) {
|
||||
const x0 = arr[i]
|
||||
const x1 = arr[i + 1]
|
||||
const x2 = arr[i + 2]
|
||||
const x3 = arr[i + 3]
|
||||
const x4 = arr[i + 4]
|
||||
const x5 = arr[i + 5]
|
||||
const x6 = arr[i + 6]
|
||||
const x7 = arr[i + 7]
|
||||
const x8 = arr[i + 8]
|
||||
const x9 = arr[i + 9]
|
||||
const x10 = arr[i + 10]
|
||||
const x11 = arr[i + 11]
|
||||
oa[j] = codepairs[(x0 << 4) | (x1 >> 4)]
|
||||
oa[j + 1] = codepairs[((x1 & 0x0f) << 8) | x2]
|
||||
oa[j + 2] = codepairs[(x3 << 4) | (x4 >> 4)]
|
||||
oa[j + 3] = codepairs[((x4 & 0x0f) << 8) | x5]
|
||||
oa[j + 4] = codepairs[(x6 << 4) | (x7 >> 4)]
|
||||
oa[j + 5] = codepairs[((x7 & 0x0f) << 8) | x8]
|
||||
oa[j + 6] = codepairs[(x9 << 4) | (x10 >> 4)]
|
||||
oa[j + 7] = codepairs[((x10 & 0x0f) << 8) | x11]
|
||||
}
|
||||
|
||||
// i < last here is equivalent to i < fullChunksBytes
|
||||
for (const last = arr.length - 2; i < last; i += 3, j += 2) {
|
||||
const a = arr[i]
|
||||
const b = arr[i + 1]
|
||||
const c = arr[i + 2]
|
||||
oa[j] = codepairs[(a << 4) | (b >> 4)]
|
||||
oa[j + 1] = codepairs[((b & 0x0f) << 8) | c]
|
||||
}
|
||||
|
||||
o = decodeAscii(oa)
|
||||
} else {
|
||||
// This can be optimized by ~25% with templates on Hermes, but this codepath is not called on Hermes, it uses btoa
|
||||
// Check git history for templates version
|
||||
for (; i < fullChunksBytes; i += 3) {
|
||||
const a = arr[i]
|
||||
const b = arr[i + 1]
|
||||
const c = arr[i + 2]
|
||||
o += pairs[(a << 4) | (b >> 4)]
|
||||
o += pairs[((b & 0x0f) << 8) | c]
|
||||
}
|
||||
}
|
||||
|
||||
// If we have something left, process it with a full algo
|
||||
let carry = 0
|
||||
let shift = 2 // First byte needs to be shifted by 2 to get 6 bits
|
||||
const length = arr.length
|
||||
for (; i < length; i++) {
|
||||
const x = arr[i]
|
||||
o += alphabet[carry | (x >> shift)] // shift >= 2, so this fits
|
||||
if (shift === 6) {
|
||||
shift = 0
|
||||
o += alphabet[x & 0x3f]
|
||||
}
|
||||
|
||||
carry = (x << (6 - shift)) & 0x3f
|
||||
shift += 2 // Each byte prints 6 bits and leaves 2 bits
|
||||
}
|
||||
|
||||
if (shift !== 2) o += alphabet[carry] // shift 2 means we have no carry left
|
||||
if (padding) o += ['', '==', '='][length - fullChunksBytes]
|
||||
|
||||
return o
|
||||
}
|
||||
|
||||
// TODO: can this be optimized? This only affects non-Hermes barebone engines though
|
||||
const mapSize = nativeEncoder ? 128 : 65_536 // we have to store 64 KiB map or recheck everything if we can't decode to byte array
|
||||
|
||||
export function fromBase64(str, isURL) {
|
||||
let inputLength = str.length
|
||||
while (str[inputLength - 1] === '=') inputLength--
|
||||
const paddingLength = str.length - inputLength
|
||||
const tailLength = inputLength % 4
|
||||
const mainLength = inputLength - tailLength // multiples of 4
|
||||
if (tailLength === 1) throw new SyntaxError(E_LENGTH)
|
||||
if (paddingLength > 3 || (paddingLength !== 0 && str.length % 4 !== 0)) {
|
||||
throw new SyntaxError(E_PADDING)
|
||||
}
|
||||
|
||||
const alphabet = isURL ? BASE64URL : BASE64
|
||||
const helpers = isURL ? BASE64URL_HELPERS : BASE64_HELPERS
|
||||
|
||||
if (!helpers.fromMap) {
|
||||
helpers.fromMap = new Int8Array(mapSize).fill(-1) // no regex input validation here, so we map all other bytes to -1 and recheck sign
|
||||
alphabet.forEach((c, i) => (helpers.fromMap[c.charCodeAt(0)] = i))
|
||||
}
|
||||
|
||||
const m = helpers.fromMap
|
||||
|
||||
const arr = new Uint8Array(Math.floor((inputLength * 3) / 4))
|
||||
let at = 0
|
||||
let i = 0
|
||||
|
||||
if (nativeEncoder) {
|
||||
const codes = encodeAscii(str, E_CHAR)
|
||||
for (; i < mainLength; i += 4) {
|
||||
const c0 = codes[i]
|
||||
const c1 = codes[i + 1]
|
||||
const c2 = codes[i + 2]
|
||||
const c3 = codes[i + 3]
|
||||
const a = (m[c0] << 18) | (m[c1] << 12) | (m[c2] << 6) | m[c3]
|
||||
if (a < 0) throw new SyntaxError(E_CHAR)
|
||||
arr[at] = a >> 16
|
||||
arr[at + 1] = (a >> 8) & 0xff
|
||||
arr[at + 2] = a & 0xff
|
||||
at += 3
|
||||
}
|
||||
} else {
|
||||
for (; i < mainLength; i += 4) {
|
||||
const c0 = str.charCodeAt(i)
|
||||
const c1 = str.charCodeAt(i + 1)
|
||||
const c2 = str.charCodeAt(i + 2)
|
||||
const c3 = str.charCodeAt(i + 3)
|
||||
const a = (m[c0] << 18) | (m[c1] << 12) | (m[c2] << 6) | m[c3]
|
||||
if (a < 0) throw new SyntaxError(E_CHAR)
|
||||
arr[at] = a >> 16
|
||||
arr[at + 1] = (a >> 8) & 0xff
|
||||
arr[at + 2] = a & 0xff
|
||||
at += 3
|
||||
}
|
||||
}
|
||||
|
||||
// Can be 0, 2 or 3, verified by padding checks already
|
||||
if (tailLength < 2) return arr // 0
|
||||
const ab = (m[str.charCodeAt(i++)] << 6) | m[str.charCodeAt(i++)]
|
||||
if (ab < 0) throw new SyntaxError(E_CHAR)
|
||||
arr[at++] = ab >> 4
|
||||
if (tailLength < 3) {
|
||||
if (ab & 0xf) throw new SyntaxError(E_LAST)
|
||||
return arr // 2
|
||||
}
|
||||
|
||||
const c = m[str.charCodeAt(i++)]
|
||||
if (c < 0) throw new SyntaxError(E_CHAR)
|
||||
arr[at++] = ((ab << 4) & 0xff) | (c >> 2)
|
||||
if (c & 0x3) throw new SyntaxError(E_LAST)
|
||||
return arr // 3
|
||||
}
|
||||
38
node_modules/@exodus/bytes/fallback/encoding.api.js
generated
vendored
Normal file
38
node_modules/@exodus/bytes/fallback/encoding.api.js
generated
vendored
Normal file
@@ -0,0 +1,38 @@
|
||||
// TODO: make this more strict against Symbol.toStringTag
|
||||
// Is not very significant though, anything faking Symbol.toStringTag could as well override
|
||||
// prototypes, which is not something we protect against
|
||||
|
||||
function isAnyArrayBuffer(x) {
|
||||
if (x instanceof ArrayBuffer) return true
|
||||
if (globalThis.SharedArrayBuffer && x instanceof SharedArrayBuffer) return true
|
||||
if (!x || typeof x.byteLength !== 'number') return false
|
||||
const s = Object.prototype.toString.call(x)
|
||||
return s === '[object ArrayBuffer]' || s === '[object SharedArrayBuffer]'
|
||||
}
|
||||
|
||||
export function fromSource(x) {
|
||||
if (x instanceof Uint8Array) return x
|
||||
if (ArrayBuffer.isView(x)) return new Uint8Array(x.buffer, x.byteOffset, x.byteLength)
|
||||
if (isAnyArrayBuffer(x)) {
|
||||
if ('detached' in x) return x.detached === true ? new Uint8Array() : new Uint8Array(x)
|
||||
// Old engines without .detached, try-catch
|
||||
try {
|
||||
return new Uint8Array(x)
|
||||
} catch {
|
||||
return new Uint8Array()
|
||||
}
|
||||
}
|
||||
|
||||
throw new TypeError('Argument must be a SharedArrayBuffer, ArrayBuffer or ArrayBufferView')
|
||||
}
|
||||
|
||||
// Warning: unlike whatwg-encoding, returns lowercased labels
|
||||
// Those are case-insensitive and that's how TextDecoder encoding getter normalizes them
|
||||
export function getBOMEncoding(input) {
|
||||
const u8 = fromSource(input) // asserts
|
||||
if (u8.length >= 3 && u8[0] === 0xef && u8[1] === 0xbb && u8[2] === 0xbf) return 'utf-8'
|
||||
if (u8.length < 2) return null
|
||||
if (u8[0] === 0xff && u8[1] === 0xfe) return 'utf-16le'
|
||||
if (u8[0] === 0xfe && u8[1] === 0xff) return 'utf-16be'
|
||||
return null
|
||||
}
|
||||
369
node_modules/@exodus/bytes/fallback/encoding.js
generated
vendored
Normal file
369
node_modules/@exodus/bytes/fallback/encoding.js
generated
vendored
Normal file
@@ -0,0 +1,369 @@
|
||||
// We can't return native TextDecoder if it's present, as Node.js one is broken on windows-1252 and we fix that
|
||||
// We are also faster than Node.js built-in on both TextEncoder and TextDecoder
|
||||
|
||||
import { utf16toString, utf16toStringLoose } from '@exodus/bytes/utf16.js'
|
||||
import { utf8fromStringLoose, utf8toString, utf8toStringLoose } from '@exodus/bytes/utf8.js'
|
||||
import {
|
||||
createSinglebyteDecoder,
|
||||
latin1toString,
|
||||
latin1fromString,
|
||||
} from '@exodus/bytes/single-byte.js'
|
||||
import labels from './encoding.labels.js'
|
||||
import { fromSource, getBOMEncoding } from './encoding.api.js'
|
||||
import { unfinishedBytes, mergePrefix } from './encoding.util.js'
|
||||
|
||||
export { getBOMEncoding } from './encoding.api.js'
|
||||
|
||||
export const E_ENCODING = 'Unknown encoding'
|
||||
const E_MULTI = "import '@exodus/bytes/encoding.js' for legacy multi-byte encodings support"
|
||||
const E_OPTIONS = 'The "options" argument must be of type object'
|
||||
const replacementChar = '\uFFFD'
|
||||
const multibyteSet = new Set(['big5', 'euc-kr', 'euc-jp', 'iso-2022-jp', 'shift_jis', 'gbk', 'gb18030']) // prettier-ignore
|
||||
let createMultibyteDecoder, multibyteEncoder
|
||||
|
||||
let labelsMap
|
||||
// Warning: unlike whatwg-encoding, returns lowercased labels
|
||||
// Those are case-insensitive and that's how TextDecoder encoding getter normalizes them
|
||||
// https://encoding.spec.whatwg.org/#names-and-labels
|
||||
export function normalizeEncoding(label) {
|
||||
// fast path
|
||||
if (label === 'utf-8' || label === 'utf8' || label === 'UTF-8' || label === 'UTF8') return 'utf-8'
|
||||
if (label === 'windows-1252' || label === 'ascii' || label === 'latin1') return 'windows-1252'
|
||||
// full map
|
||||
if (/[^\w\t\n\f\r .:-]/i.test(label)) return null // must be ASCII (with ASCII whitespace)
|
||||
const low = `${label}`.trim().toLowerCase()
|
||||
if (Object.hasOwn(labels, low)) return low
|
||||
if (!labelsMap) {
|
||||
labelsMap = new Map()
|
||||
for (const [name, aliases] of Object.entries(labels)) {
|
||||
for (const alias of aliases) labelsMap.set(alias, name)
|
||||
}
|
||||
}
|
||||
|
||||
const mapped = labelsMap.get(low)
|
||||
if (mapped) return mapped
|
||||
return null
|
||||
}
|
||||
|
||||
const uppercasePrefixes = new Set(['utf', 'iso', 'koi', 'euc', 'ibm', 'gbk'])
|
||||
|
||||
// Unlike normalizeEncoding, case-sensitive
|
||||
// https://encoding.spec.whatwg.org/#names-and-labels
|
||||
export function labelToName(label) {
|
||||
const enc = normalizeEncoding(label)
|
||||
if (enc === 'utf-8') return 'UTF-8' // fast path
|
||||
if (!enc) return enc
|
||||
if (uppercasePrefixes.has(enc.slice(0, 3))) return enc.toUpperCase()
|
||||
if (enc === 'big5') return 'Big5'
|
||||
if (enc === 'shift_jis') return 'Shift_JIS'
|
||||
return enc
|
||||
}
|
||||
|
||||
export const isMultibyte = (enc) => multibyteSet.has(enc)
|
||||
export function setMultibyte(createDecoder, createEncoder) {
|
||||
createMultibyteDecoder = createDecoder
|
||||
multibyteEncoder = createEncoder
|
||||
}
|
||||
|
||||
export function getMultibyteEncoder() {
|
||||
if (!multibyteEncoder) throw new Error(E_MULTI)
|
||||
return multibyteEncoder
|
||||
}
|
||||
|
||||
const define = (obj, key, value) => Object.defineProperty(obj, key, { value, writable: false })
|
||||
|
||||
function isAnyUint8Array(x) {
|
||||
if (x instanceof Uint8Array) return true
|
||||
if (!x || !ArrayBuffer.isView(x) || x.BYTES_PER_ELEMENT !== 1) return false
|
||||
return Object.prototype.toString.call(x) === '[object Uint8Array]'
|
||||
}
|
||||
|
||||
function unicodeDecoder(encoding, loose) {
|
||||
if (encoding === 'utf-8') return loose ? utf8toStringLoose : utf8toString // likely
|
||||
const form = encoding === 'utf-16le' ? 'uint8-le' : 'uint8-be'
|
||||
return loose ? (u) => utf16toStringLoose(u, form) : (u) => utf16toString(u, form)
|
||||
}
|
||||
|
||||
export class TextDecoder {
|
||||
#decode
|
||||
#unicode
|
||||
#multibyte
|
||||
#chunk
|
||||
#canBOM
|
||||
|
||||
constructor(encoding = 'utf-8', options = {}) {
|
||||
if (typeof options !== 'object') throw new TypeError(E_OPTIONS)
|
||||
const enc = normalizeEncoding(encoding)
|
||||
if (!enc || enc === 'replacement') throw new RangeError(E_ENCODING)
|
||||
define(this, 'encoding', enc)
|
||||
define(this, 'fatal', !!options.fatal)
|
||||
define(this, 'ignoreBOM', !!options.ignoreBOM)
|
||||
this.#unicode = enc === 'utf-8' || enc === 'utf-16le' || enc === 'utf-16be'
|
||||
this.#multibyte = !this.#unicode && isMultibyte(enc)
|
||||
this.#canBOM = this.#unicode && !this.ignoreBOM
|
||||
}
|
||||
|
||||
get [Symbol.toStringTag]() {
|
||||
return 'TextDecoder'
|
||||
}
|
||||
|
||||
decode(input, options = {}) {
|
||||
if (typeof options !== 'object') throw new TypeError(E_OPTIONS)
|
||||
const stream = !!options.stream
|
||||
let u = input === undefined ? new Uint8Array() : fromSource(input)
|
||||
const empty = u.length === 0 // also can't be streaming after next line
|
||||
if (empty && stream) return '' // no state change
|
||||
|
||||
if (this.#unicode) {
|
||||
let prefix
|
||||
if (this.#chunk) {
|
||||
const merged = mergePrefix(u, this.#chunk, this.encoding)
|
||||
if (u.length < 3) {
|
||||
u = merged // might be unfinished, but fully consumed old u
|
||||
} else {
|
||||
prefix = merged // stops at complete chunk
|
||||
const add = prefix.length - this.#chunk.length
|
||||
if (add > 0) u = u.subarray(add)
|
||||
}
|
||||
|
||||
this.#chunk = null
|
||||
} else if (empty) {
|
||||
this.#canBOM = !this.ignoreBOM // not streaming
|
||||
return ''
|
||||
}
|
||||
|
||||
// For non-stream utf-8 we don't have to do this as it matches utf8toStringLoose already
|
||||
// For non-stream loose utf-16 we still have to do this as this API supports uneven byteLength unlike utf16toStringLoose
|
||||
let suffix = ''
|
||||
if (stream || (!this.fatal && this.encoding !== 'utf-8')) {
|
||||
const trail = unfinishedBytes(u, u.byteLength, this.encoding)
|
||||
if (trail > 0) {
|
||||
if (stream) {
|
||||
this.#chunk = Uint8Array.from(u.subarray(-trail)) // copy
|
||||
} else {
|
||||
// non-fatal mode as already checked
|
||||
suffix = replacementChar
|
||||
}
|
||||
|
||||
u = u.subarray(0, -trail)
|
||||
}
|
||||
}
|
||||
|
||||
let seenBOM = false
|
||||
if (this.#canBOM) {
|
||||
const bom = this.#findBom(prefix ?? u)
|
||||
if (bom) {
|
||||
seenBOM = true
|
||||
if (prefix) {
|
||||
prefix = prefix.subarray(bom)
|
||||
} else {
|
||||
u = u.subarray(bom)
|
||||
}
|
||||
}
|
||||
} else if (!stream && !this.ignoreBOM) {
|
||||
this.#canBOM = true
|
||||
}
|
||||
|
||||
if (!this.#decode) this.#decode = unicodeDecoder(this.encoding, !this.fatal)
|
||||
try {
|
||||
const res = (prefix ? this.#decode(prefix) : '') + this.#decode(u) + suffix
|
||||
// "BOM seen" is set on the current decode call only if it did not error, in "serialize I/O queue" after decoding
|
||||
if (stream && (seenBOM || res.length > 0)) this.#canBOM = false
|
||||
return res
|
||||
} catch (err) {
|
||||
this.#chunk = null // reset unfinished chunk on errors
|
||||
// The correct way per spec seems to be not destroying the decoder state (aka BOM here) in stream mode
|
||||
// See also multi-byte.js
|
||||
throw err
|
||||
}
|
||||
|
||||
// eslint-disable-next-line no-else-return
|
||||
} else if (this.#multibyte) {
|
||||
if (!createMultibyteDecoder) throw new Error(E_MULTI)
|
||||
if (!this.#decode) this.#decode = createMultibyteDecoder(this.encoding, !this.fatal) // can contain state!
|
||||
return this.#decode(u, stream)
|
||||
} else {
|
||||
if (!this.#decode) this.#decode = createSinglebyteDecoder(this.encoding, !this.fatal)
|
||||
return this.#decode(u)
|
||||
}
|
||||
}
|
||||
|
||||
#findBom(u) {
|
||||
switch (this.encoding) {
|
||||
case 'utf-8':
|
||||
return u.byteLength >= 3 && u[0] === 0xef && u[1] === 0xbb && u[2] === 0xbf ? 3 : 0
|
||||
case 'utf-16le':
|
||||
return u.byteLength >= 2 && u[0] === 0xff && u[1] === 0xfe ? 2 : 0
|
||||
case 'utf-16be':
|
||||
return u.byteLength >= 2 && u[0] === 0xfe && u[1] === 0xff ? 2 : 0
|
||||
}
|
||||
|
||||
/* c8 ignore next */
|
||||
throw new Error('Unreachable')
|
||||
}
|
||||
}
|
||||
|
||||
export class TextEncoder {
|
||||
constructor() {
|
||||
define(this, 'encoding', 'utf-8')
|
||||
}
|
||||
|
||||
get [Symbol.toStringTag]() {
|
||||
return 'TextEncoder'
|
||||
}
|
||||
|
||||
encode(str = '') {
|
||||
if (typeof str !== 'string') str = `${str}`
|
||||
return utf8fromStringLoose(str) // non-pooled
|
||||
}
|
||||
|
||||
encodeInto(str, target) {
|
||||
if (typeof str !== 'string') str = `${str}`
|
||||
if (!isAnyUint8Array(target)) throw new TypeError('Target must be an Uint8Array')
|
||||
if (target.buffer.detached) return { read: 0, written: 0 } // Until https://github.com/whatwg/encoding/issues/324 is resolved
|
||||
|
||||
const tlen = target.length
|
||||
if (tlen < str.length) str = str.slice(0, tlen)
|
||||
let u8 = utf8fromStringLoose(str)
|
||||
let read
|
||||
if (tlen >= u8.length) {
|
||||
read = str.length
|
||||
} else if (u8.length === str.length) {
|
||||
if (u8.length > tlen) u8 = u8.subarray(0, tlen) // ascii can be truncated
|
||||
read = u8.length
|
||||
} else {
|
||||
u8 = u8.subarray(0, tlen)
|
||||
const unfinished = unfinishedBytes(u8, u8.length, 'utf-8')
|
||||
if (unfinished > 0) u8 = u8.subarray(0, u8.length - unfinished)
|
||||
|
||||
// We can do this because loose str -> u8 -> str preserves length, unlike loose u8 -> str -> u8
|
||||
// Each unpaired surrogate (1 charcode) is replaced with a single charcode
|
||||
read = utf8toStringLoose(u8).length // FIXME: Converting back is very inefficient
|
||||
}
|
||||
|
||||
try {
|
||||
target.set(u8)
|
||||
} catch {
|
||||
return { read: 0, written: 0 } // see above, likely detached but no .detached property support
|
||||
}
|
||||
|
||||
return { read, written: u8.length }
|
||||
}
|
||||
}
|
||||
|
||||
const E_NO_STREAMS = 'TransformStream global not present in the environment'
|
||||
|
||||
// https://encoding.spec.whatwg.org/#interface-textdecoderstream
|
||||
export class TextDecoderStream {
|
||||
constructor(encoding = 'utf-8', options = {}) {
|
||||
if (!globalThis.TransformStream) throw new Error(E_NO_STREAMS)
|
||||
const decoder = new TextDecoder(encoding, options)
|
||||
const transform = new TransformStream({
|
||||
transform: (chunk, controller) => {
|
||||
const value = decoder.decode(fromSource(chunk), { stream: true })
|
||||
if (value) controller.enqueue(value)
|
||||
},
|
||||
flush: (controller) => {
|
||||
// https://streams.spec.whatwg.org/#dom-transformer-flush
|
||||
const value = decoder.decode()
|
||||
if (value) controller.enqueue(value)
|
||||
// No need to call .terminate() (Node.js is wrong)
|
||||
},
|
||||
})
|
||||
|
||||
define(this, 'encoding', decoder.encoding)
|
||||
define(this, 'fatal', decoder.fatal)
|
||||
define(this, 'ignoreBOM', decoder.ignoreBOM)
|
||||
define(this, 'readable', transform.readable)
|
||||
define(this, 'writable', transform.writable)
|
||||
}
|
||||
|
||||
get [Symbol.toStringTag]() {
|
||||
return 'TextDecoderStream'
|
||||
}
|
||||
}
|
||||
|
||||
// https://encoding.spec.whatwg.org/#interface-textencoderstream
|
||||
// Only UTF-8 per spec
|
||||
export class TextEncoderStream {
|
||||
constructor() {
|
||||
if (!globalThis.TransformStream) throw new Error(E_NO_STREAMS)
|
||||
let lead
|
||||
const transform = new TransformStream({
|
||||
// https://encoding.spec.whatwg.org/#encode-and-enqueue-a-chunk
|
||||
// Not identical in code, but reuses loose mode to have identical behavior
|
||||
transform: (chunk, controller) => {
|
||||
let s = String(chunk) // DOMString, might contain unpaired surrogates
|
||||
if (s.length === 0) return
|
||||
if (lead) {
|
||||
s = lead + s
|
||||
lead = null
|
||||
}
|
||||
|
||||
const last = s.charCodeAt(s.length - 1) // Can't come from previous lead due to length check
|
||||
if ((last & 0xfc_00) === 0xd8_00) {
|
||||
lead = s[s.length - 1]
|
||||
s = s.slice(0, -1)
|
||||
}
|
||||
|
||||
if (s) controller.enqueue(utf8fromStringLoose(s))
|
||||
},
|
||||
// https://encoding.spec.whatwg.org/#encode-and-flush
|
||||
flush: (controller) => {
|
||||
if (lead) controller.enqueue(Uint8Array.of(0xef, 0xbf, 0xbd))
|
||||
},
|
||||
})
|
||||
|
||||
define(this, 'encoding', 'utf-8')
|
||||
define(this, 'readable', transform.readable)
|
||||
define(this, 'writable', transform.writable)
|
||||
}
|
||||
|
||||
get [Symbol.toStringTag]() {
|
||||
return 'TextEncoderStream'
|
||||
}
|
||||
}
|
||||
|
||||
// https://encoding.spec.whatwg.org/#decode
|
||||
// Warning: encoding sniffed from BOM takes preference over the supplied one
|
||||
// Warning: lossy, performs replacement, no option of throwing
|
||||
// Completely ignores encoding and even skips validation when BOM is found
|
||||
// Unlike TextDecoder public API, additionally supports 'replacement' encoding
|
||||
export function legacyHookDecode(input, fallbackEncoding = 'utf-8') {
|
||||
let u8 = fromSource(input)
|
||||
const bomEncoding = getBOMEncoding(u8)
|
||||
if (bomEncoding) u8 = u8.subarray(bomEncoding === 'utf-8' ? 3 : 2)
|
||||
const enc = bomEncoding ?? normalizeEncoding(fallbackEncoding) // "the byte order mark is more authoritative than anything else"
|
||||
|
||||
if (enc === 'utf-8') return utf8toStringLoose(u8)
|
||||
if (enc === 'utf-16le' || enc === 'utf-16be') {
|
||||
let suffix = ''
|
||||
if (u8.byteLength % 2 !== 0) {
|
||||
suffix = replacementChar
|
||||
u8 = u8.subarray(0, -unfinishedBytes(u8, u8.byteLength, enc))
|
||||
}
|
||||
|
||||
return utf16toStringLoose(u8, enc === 'utf-16le' ? 'uint8-le' : 'uint8-be') + suffix
|
||||
}
|
||||
|
||||
if (!Object.hasOwn(labels, enc)) throw new RangeError(E_ENCODING)
|
||||
|
||||
if (isMultibyte(enc)) {
|
||||
if (!createMultibyteDecoder) throw new Error(E_MULTI)
|
||||
return createMultibyteDecoder(enc, true)(u8)
|
||||
}
|
||||
|
||||
// https://encoding.spec.whatwg.org/#replacement-decoder
|
||||
// On non-streaming non-fatal case, it just replaces any non-empty input with a single replacement char
|
||||
if (enc === 'replacement') return input.byteLength > 0 ? replacementChar : ''
|
||||
|
||||
return createSinglebyteDecoder(enc, true)(u8)
|
||||
}
|
||||
|
||||
export function isomorphicDecode(input) {
|
||||
return latin1toString(fromSource(input))
|
||||
}
|
||||
|
||||
export function isomorphicEncode(str) {
|
||||
return latin1fromString(str)
|
||||
}
|
||||
50
node_modules/@exodus/bytes/fallback/encoding.labels.js
generated
vendored
Normal file
50
node_modules/@exodus/bytes/fallback/encoding.labels.js
generated
vendored
Normal file
@@ -0,0 +1,50 @@
|
||||
// See https://encoding.spec.whatwg.org/#names-and-labels
|
||||
|
||||
/* eslint-disable @exodus/export-default/named */
|
||||
// prettier-ignore
|
||||
const labels = {
|
||||
'utf-8': ['unicode-1-1-utf-8', 'unicode11utf8', 'unicode20utf8', 'utf8', 'x-unicode20utf8'],
|
||||
'utf-16be': ['unicodefffe'],
|
||||
'utf-16le': ['csunicode', 'iso-10646-ucs-2', 'ucs-2', 'unicode', 'unicodefeff', 'utf-16'],
|
||||
'iso-8859-2': ['iso-ir-101'],
|
||||
'iso-8859-3': ['iso-ir-109'],
|
||||
'iso-8859-4': ['iso-ir-110'],
|
||||
'iso-8859-5': ['csisolatincyrillic', 'cyrillic', 'iso-ir-144'],
|
||||
'iso-8859-6': ['arabic', 'asmo-708', 'csiso88596e', 'csiso88596i', 'csisolatinarabic', 'ecma-114', 'iso-8859-6-e', 'iso-8859-6-i', 'iso-ir-127'],
|
||||
'iso-8859-7': ['csisolatingreek', 'ecma-118', 'elot_928', 'greek', 'greek8', 'iso-ir-126', 'sun_eu_greek'],
|
||||
'iso-8859-8': ['csiso88598e', 'csisolatinhebrew', 'hebrew', 'iso-8859-8-e', 'iso-ir-138', 'visual'],
|
||||
'iso-8859-8-i': ['csiso88598i', 'logical'],
|
||||
'iso-8859-16': [],
|
||||
'koi8-r': ['cskoi8r', 'koi', 'koi8', 'koi8_r'],
|
||||
'koi8-u': ['koi8-ru'],
|
||||
'windows-874': ['dos-874', 'iso-8859-11', 'iso8859-11', 'iso885911', 'tis-620'],
|
||||
ibm866: ['866', 'cp866', 'csibm866'],
|
||||
'x-mac-cyrillic': ['x-mac-ukrainian'],
|
||||
macintosh: ['csmacintosh', 'mac', 'x-mac-roman'],
|
||||
gbk: ['chinese', 'csgb2312', 'csiso58gb231280', 'gb2312', 'gb_2312', 'gb_2312-80', 'iso-ir-58', 'x-gbk'],
|
||||
gb18030: [],
|
||||
big5: ['big5-hkscs', 'cn-big5', 'csbig5', 'x-x-big5'],
|
||||
'euc-jp': ['cseucpkdfmtjapanese', 'x-euc-jp'],
|
||||
shift_jis: ['csshiftjis', 'ms932', 'ms_kanji', 'shift-jis', 'sjis', 'windows-31j', 'x-sjis'],
|
||||
'euc-kr': ['cseuckr', 'csksc56011987', 'iso-ir-149', 'korean', 'ks_c_5601-1987', 'ks_c_5601-1989', 'ksc5601', 'ksc_5601', 'windows-949'],
|
||||
'iso-2022-jp': ['csiso2022jp'],
|
||||
replacement: ['csiso2022kr', 'hz-gb-2312', 'iso-2022-cn', 'iso-2022-cn-ext', 'iso-2022-kr'],
|
||||
'x-user-defined': [],
|
||||
}
|
||||
|
||||
for (const i of [10, 13, 14, 15]) labels[`iso-8859-${i}`] = [`iso8859-${i}`, `iso8859${i}`]
|
||||
for (const i of [2, 6, 7]) labels[`iso-8859-${i}`].push(`iso_8859-${i}:1987`)
|
||||
for (const i of [3, 4, 5, 8]) labels[`iso-8859-${i}`].push(`iso_8859-${i}:1988`)
|
||||
// prettier-ignore
|
||||
for (let i = 2; i < 9; i++) labels[`iso-8859-${i}`].push(`iso8859-${i}`, `iso8859${i}`, `iso_8859-${i}`)
|
||||
for (let i = 2; i < 5; i++) labels[`iso-8859-${i}`].push(`csisolatin${i}`, `l${i}`, `latin${i}`)
|
||||
for (let i = 0; i < 9; i++) labels[`windows-125${i}`] = [`cp125${i}`, `x-cp125${i}`]
|
||||
|
||||
// prettier-ignore
|
||||
labels['windows-1252'].push('ansi_x3.4-1968', 'ascii', 'cp819', 'csisolatin1', 'ibm819', 'iso-8859-1', 'iso-ir-100', 'iso8859-1', 'iso88591', 'iso_8859-1', 'iso_8859-1:1987', 'l1', 'latin1', 'us-ascii')
|
||||
// prettier-ignore
|
||||
labels['windows-1254'].push('csisolatin5', 'iso-8859-9', 'iso-ir-148', 'iso8859-9', 'iso88599', 'iso_8859-9', 'iso_8859-9:1989', 'l5', 'latin5')
|
||||
labels['iso-8859-10'].push('csisolatin6', 'iso-ir-157', 'l6', 'latin6')
|
||||
labels['iso-8859-15'].push('csisolatin9', 'iso_8859-15', 'l9')
|
||||
|
||||
export default labels
|
||||
65
node_modules/@exodus/bytes/fallback/encoding.util.js
generated
vendored
Normal file
65
node_modules/@exodus/bytes/fallback/encoding.util.js
generated
vendored
Normal file
@@ -0,0 +1,65 @@
|
||||
// Get a number of last bytes in an Uint8Array `u` ending at `len` that don't
|
||||
// form a codepoint yet, but can be a part of a single codepoint on more data
|
||||
export function unfinishedBytes(u, len, enc) {
|
||||
switch (enc) {
|
||||
case 'utf-8': {
|
||||
// 0-3
|
||||
let p = 0
|
||||
while (p < 2 && p < len && (u[len - p - 1] & 0xc0) === 0x80) p++ // go back 0-2 trailing bytes
|
||||
if (p === len) return 0 // no space for lead
|
||||
const l = u[len - p - 1]
|
||||
if (l < 0xc2 || l > 0xf4) return 0 // not a lead
|
||||
if (p === 0) return 1 // nothing to recheck, we have only lead, return it. 2-byte must return here
|
||||
if (l < 0xe0 || (l < 0xf0 && p >= 2)) return 0 // 2-byte, or 3-byte or less and we already have 2 trailing
|
||||
const lower = l === 0xf0 ? 0x90 : l === 0xe0 ? 0xa0 : 0x80
|
||||
const upper = l === 0xf4 ? 0x8f : l === 0xed ? 0x9f : 0xbf
|
||||
const n = u[len - p]
|
||||
return n >= lower && n <= upper ? p + 1 : 0
|
||||
}
|
||||
|
||||
case 'utf-16le':
|
||||
case 'utf-16be': {
|
||||
// 0-3
|
||||
const p = len % 2 // uneven byte length adds 1
|
||||
if (len < 2) return p
|
||||
const l = len - p - 1
|
||||
const last = enc === 'utf-16le' ? (u[l] << 8) ^ u[l - 1] : (u[l - 1] << 8) ^ u[l]
|
||||
return last >= 0xd8_00 && last < 0xdc_00 ? p + 2 : p // lone lead adds 2
|
||||
}
|
||||
}
|
||||
|
||||
throw new Error('Unsupported encoding')
|
||||
}
|
||||
|
||||
// Merge prefix `chunk` with `u` and return new combined prefix
|
||||
// For u.length < 3, fully consumes u and can return unfinished data,
|
||||
// otherwise returns a prefix with no unfinished bytes
|
||||
export function mergePrefix(u, chunk, enc) {
|
||||
if (u.length === 0) return chunk
|
||||
const cl = chunk.length
|
||||
if (u.length < 3) {
|
||||
// No reason to bruteforce offsets, also it's possible this doesn't yet end the sequence
|
||||
const a = new Uint8Array(cl + u.length)
|
||||
a.set(chunk)
|
||||
a.set(u, cl)
|
||||
return a
|
||||
}
|
||||
|
||||
// Slice off a small portion of u into prefix chunk so we can decode them separately without extending array size
|
||||
const t = new Uint8Array(cl + 3) // We have 1-3 bytes and need 1-3 more bytes
|
||||
t.set(chunk)
|
||||
t.set(u.subarray(0, 3), cl)
|
||||
|
||||
// Stop at the first offset where unfinished bytes reaches 0 or fits into u
|
||||
// If that doesn't happen (u too short), just concat chunk and u completely (above)
|
||||
for (let i = 1; i <= 3; i++) {
|
||||
const unfinished = unfinishedBytes(t, cl + i, enc) // 0-3
|
||||
if (unfinished <= i) {
|
||||
// Always reachable at 3, but we still need 'unfinished' value for it
|
||||
const add = i - unfinished // 0-3
|
||||
return add > 0 ? t.subarray(0, cl + add) : chunk
|
||||
}
|
||||
}
|
||||
|
||||
// Unreachable
|
||||
}
|
||||
126
node_modules/@exodus/bytes/fallback/hex.js
generated
vendored
Normal file
126
node_modules/@exodus/bytes/fallback/hex.js
generated
vendored
Normal file
@@ -0,0 +1,126 @@
|
||||
import { E_STRING } from './_utils.js'
|
||||
import { nativeDecoder, nativeEncoder, decode2string } from './platform.js'
|
||||
import { encodeAscii, decodeAscii } from './latin1.js'
|
||||
|
||||
let hexArray // array of 256 bytes converted to two-char hex strings
|
||||
let hexCodes // hexArray converted to u16 code pairs
|
||||
let dehexArray
|
||||
const _00 = 0x30_30 // '00' string in hex, the only allowed char pair to generate 0 byte
|
||||
const _ff = 0x66_66 // 'ff' string in hex, max allowed char pair (larger than 'FF' string)
|
||||
const allowed = '0123456789ABCDEFabcdef'
|
||||
|
||||
export const E_HEX = 'Input is not a hex string'
|
||||
|
||||
// Expects a checked Uint8Array
|
||||
export function toHex(arr) {
|
||||
if (!hexArray) hexArray = Array.from({ length: 256 }, (_, i) => i.toString(16).padStart(2, '0'))
|
||||
const length = arr.length // this helps Hermes
|
||||
|
||||
// Only old browsers use this, barebone engines don't have TextDecoder
|
||||
// But Hermes can use this when it (hopefully) implements TextDecoder
|
||||
if (nativeDecoder) {
|
||||
if (!hexCodes) {
|
||||
hexCodes = new Uint16Array(256)
|
||||
const u8 = new Uint8Array(hexCodes.buffer, hexCodes.byteOffset, hexCodes.byteLength)
|
||||
for (let i = 0; i < 256; i++) {
|
||||
const pair = hexArray[i]
|
||||
u8[2 * i] = pair.charCodeAt(0)
|
||||
u8[2 * i + 1] = pair.charCodeAt(1)
|
||||
}
|
||||
}
|
||||
|
||||
const oa = new Uint16Array(length)
|
||||
let i = 0
|
||||
for (const last3 = arr.length - 3; ; i += 4) {
|
||||
if (i >= last3) break // loop is fast enough for moving this here to be useful on JSC
|
||||
const x0 = arr[i]
|
||||
const x1 = arr[i + 1]
|
||||
const x2 = arr[i + 2]
|
||||
const x3 = arr[i + 3]
|
||||
oa[i] = hexCodes[x0]
|
||||
oa[i + 1] = hexCodes[x1]
|
||||
oa[i + 2] = hexCodes[x2]
|
||||
oa[i + 3] = hexCodes[x3]
|
||||
}
|
||||
|
||||
for (; i < length; i++) oa[i] = hexCodes[arr[i]]
|
||||
return decodeAscii(oa)
|
||||
}
|
||||
|
||||
return decode2string(arr, 0, length, hexArray)
|
||||
}
|
||||
|
||||
export function fromHex(str) {
|
||||
if (typeof str !== 'string') throw new TypeError(E_STRING)
|
||||
if (str.length % 2 !== 0) throw new SyntaxError(E_HEX)
|
||||
|
||||
const length = str.length / 2 // this helps Hermes in loops
|
||||
const arr = new Uint8Array(length)
|
||||
|
||||
// Native encoder path is beneficial even for small arrays in Hermes
|
||||
if (nativeEncoder) {
|
||||
if (!dehexArray) {
|
||||
dehexArray = new Uint8Array(_ff + 1) // 26 KiB cache, >2x perf improvement on Hermes
|
||||
const u8 = new Uint8Array(2)
|
||||
const u16 = new Uint16Array(u8.buffer, u8.byteOffset, 1) // for endianess-agnostic transform
|
||||
const map = [...allowed].map((c) => [c.charCodeAt(0), parseInt(c, 16)])
|
||||
for (const [ch, vh] of map) {
|
||||
u8[0] = ch // first we read high hex char
|
||||
for (const [cl, vl] of map) {
|
||||
u8[1] = cl // then we read low hex char
|
||||
dehexArray[u16[0]] = (vh << 4) | vl
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const codes = encodeAscii(str, E_HEX)
|
||||
const codes16 = new Uint16Array(codes.buffer, codes.byteOffset, codes.byteLength / 2)
|
||||
let i = 0
|
||||
for (const last3 = length - 3; i < last3; i += 4) {
|
||||
const ai = codes16[i]
|
||||
const bi = codes16[i + 1]
|
||||
const ci = codes16[i + 2]
|
||||
const di = codes16[i + 3]
|
||||
const a = dehexArray[ai]
|
||||
const b = dehexArray[bi]
|
||||
const c = dehexArray[ci]
|
||||
const d = dehexArray[di]
|
||||
if ((!a && ai !== _00) || (!b && bi !== _00) || (!c && ci !== _00) || (!d && di !== _00)) {
|
||||
throw new SyntaxError(E_HEX)
|
||||
}
|
||||
|
||||
arr[i] = a
|
||||
arr[i + 1] = b
|
||||
arr[i + 2] = c
|
||||
arr[i + 3] = d
|
||||
}
|
||||
|
||||
while (i < length) {
|
||||
const ai = codes16[i]
|
||||
const a = dehexArray[ai]
|
||||
if (!a && ai !== _00) throw new SyntaxError(E_HEX)
|
||||
arr[i++] = a
|
||||
}
|
||||
} else {
|
||||
if (!dehexArray) {
|
||||
// no regex input validation here, so we map all other bytes to -1 and recheck sign
|
||||
// non-ASCII chars throw already though, so we should process only 0-127
|
||||
dehexArray = new Int8Array(128).fill(-1)
|
||||
for (let i = 0; i < 16; i++) {
|
||||
const s = i.toString(16)
|
||||
dehexArray[s.charCodeAt(0)] = dehexArray[s.toUpperCase().charCodeAt(0)] = i
|
||||
}
|
||||
}
|
||||
|
||||
let j = 0
|
||||
for (let i = 0; i < length; i++) {
|
||||
const a = str.charCodeAt(j++)
|
||||
const b = str.charCodeAt(j++)
|
||||
const res = (dehexArray[a] << 4) | dehexArray[b]
|
||||
if (res < 0 || (0x7f | a | b) !== 0x7f) throw new SyntaxError(E_HEX) // 0-127
|
||||
arr[i] = res
|
||||
}
|
||||
}
|
||||
|
||||
return arr
|
||||
}
|
||||
151
node_modules/@exodus/bytes/fallback/latin1.js
generated
vendored
Normal file
151
node_modules/@exodus/bytes/fallback/latin1.js
generated
vendored
Normal file
@@ -0,0 +1,151 @@
|
||||
import {
|
||||
nativeEncoder,
|
||||
nativeDecoder,
|
||||
nativeDecoderLatin1,
|
||||
nativeBuffer,
|
||||
encodeCharcodes,
|
||||
isHermes,
|
||||
isDeno,
|
||||
isLE,
|
||||
} from './platform.js'
|
||||
|
||||
const atob = /* @__PURE__ */ (() => globalThis.atob)()
|
||||
const web64 = /* @__PURE__ */ (() => Uint8Array.prototype.toBase64)()
|
||||
|
||||
// See http://stackoverflow.com/a/22747272/680742, which says that lowest limit is in Chrome, with 0xffff args
|
||||
// On Hermes, actual max is 0x20_000 minus current stack depth, 1/16 of that should be safe
|
||||
const maxFunctionArgs = 0x20_00
|
||||
|
||||
// toBase64+atob path is faster on everything where fromBase64 is fast
|
||||
const useLatin1atob = web64 && atob
|
||||
|
||||
export function asciiPrefix(arr) {
|
||||
let p = 0 // verified ascii bytes
|
||||
const length = arr.length
|
||||
// Threshold tested on Hermes (worse on <=48, better on >=52)
|
||||
// Also on v8 arrs of size <=64 might be on heap and using Uint32Array on them is unoptimal
|
||||
if (length > 64) {
|
||||
// Speedup with u32
|
||||
const u32start = (4 - (arr.byteOffset & 3)) % 4 // offset start by this many bytes for alignment
|
||||
for (; p < u32start; p++) if (arr[p] >= 0x80) return p
|
||||
const u32length = ((arr.byteLength - u32start) / 4) | 0
|
||||
const u32 = new Uint32Array(arr.buffer, arr.byteOffset + u32start, u32length)
|
||||
let i = 0
|
||||
for (const last3 = u32length - 3; ; p += 16, i += 4) {
|
||||
if (i >= last3) break // loop is fast enough for moving this here to be _very_ useful, likely due to array access checks
|
||||
const a = u32[i]
|
||||
const b = u32[i + 1]
|
||||
const c = u32[i + 2]
|
||||
const d = u32[i + 3]
|
||||
// "(a | b | c | d) & mask" is slower on Hermes though faster on v8
|
||||
if (a & 0x80_80_80_80 || b & 0x80_80_80_80 || c & 0x80_80_80_80 || d & 0x80_80_80_80) break
|
||||
}
|
||||
|
||||
for (; i < u32length; p += 4, i++) if (u32[i] & 0x80_80_80_80) break
|
||||
}
|
||||
|
||||
for (; p < length; p++) if (arr[p] >= 0x80) return p
|
||||
return length
|
||||
}
|
||||
|
||||
// Capable of decoding Uint16Array to UTF-16 as well as Uint8Array to Latin-1
|
||||
export function decodeLatin1(arr, start = 0, stop = arr.length) {
|
||||
start |= 0
|
||||
stop |= 0
|
||||
const total = stop - start
|
||||
if (total === 0) return ''
|
||||
|
||||
if (
|
||||
useLatin1atob &&
|
||||
total >= 256 &&
|
||||
total < 1e8 &&
|
||||
arr.toBase64 === web64 &&
|
||||
arr.BYTES_PER_ELEMENT === 1
|
||||
) {
|
||||
const sliced = start === 0 && stop === arr.length ? arr : arr.subarray(start, stop)
|
||||
return atob(sliced.toBase64())
|
||||
}
|
||||
|
||||
if (total > maxFunctionArgs) {
|
||||
let prefix = ''
|
||||
for (let i = start; i < stop; ) {
|
||||
const i1 = Math.min(stop, i + maxFunctionArgs)
|
||||
prefix += String.fromCharCode.apply(String, arr.subarray(i, i1))
|
||||
i = i1
|
||||
}
|
||||
|
||||
return prefix
|
||||
}
|
||||
|
||||
const sliced = start === 0 && stop === arr.length ? arr : arr.subarray(start, stop)
|
||||
return String.fromCharCode.apply(String, sliced)
|
||||
}
|
||||
|
||||
// Unchecked for well-formedness, raw. Expects Uint16Array input
|
||||
export const decodeUCS2 =
|
||||
nativeBuffer && isLE && !isDeno
|
||||
? (u16, stop = u16.length) => {
|
||||
// TODO: fast path for BE, perhaps faster path for Deno. Note that decoder replaces, this function doesn't
|
||||
if (stop > 32) return nativeBuffer.from(u16.buffer, u16.byteOffset, stop * 2).ucs2Slice() // from 64 bytes, below are in heap
|
||||
return decodeLatin1(u16, 0, stop)
|
||||
}
|
||||
: (u16, stop = u16.length) => decodeLatin1(u16, 0, stop)
|
||||
|
||||
// Does not check input, uses best available method
|
||||
// Building an array for this is only faster than proper string concatenation when TextDecoder or native Buffer are available
|
||||
export const decodeAscii = nativeBuffer
|
||||
? (a) =>
|
||||
// Buffer is faster on Node.js (but only for long enough data), if we know that output is ascii
|
||||
a.byteLength >= 0x3_00 && !isDeno
|
||||
? nativeBuffer.from(a.buffer, a.byteOffset, a.byteLength).latin1Slice(0, a.byteLength) // .latin1Slice is faster than .asciiSlice
|
||||
: nativeDecoder.decode(a) // On Node.js, utf8 decoder is faster than latin1
|
||||
: nativeDecoderLatin1
|
||||
? (a) => nativeDecoderLatin1.decode(a) // On browsers (specifically WebKit), latin1 decoder is faster than utf8
|
||||
: (a) =>
|
||||
decodeLatin1(
|
||||
a instanceof Uint8Array ? a : new Uint8Array(a.buffer, a.byteOffset, a.byteLength)
|
||||
)
|
||||
|
||||
/* eslint-disable @exodus/mutable/no-param-reassign-prop-only */
|
||||
|
||||
export function encodeAsciiPrefix(x, s) {
|
||||
let i = 0
|
||||
for (const len3 = s.length - 3; i < len3; i += 4) {
|
||||
const x0 = s.charCodeAt(i), x1 = s.charCodeAt(i + 1), x2 = s.charCodeAt(i + 2), x3 = s.charCodeAt(i + 3) // prettier-ignore
|
||||
if ((x0 | x1 | x2 | x3) >= 128) break
|
||||
x[i] = x0
|
||||
x[i + 1] = x1
|
||||
x[i + 2] = x2
|
||||
x[i + 3] = x3
|
||||
}
|
||||
|
||||
return i
|
||||
}
|
||||
|
||||
/* eslint-enable @exodus/mutable/no-param-reassign-prop-only */
|
||||
|
||||
// Warning: can be used only on checked strings, converts strings to 8-bit
|
||||
export const encodeLatin1 = (str) => encodeCharcodes(str, new Uint8Array(str.length))
|
||||
|
||||
// Expects nativeEncoder to be present
|
||||
const useEncodeInto = /* @__PURE__ */ (() => isHermes && nativeEncoder?.encodeInto)()
|
||||
export const encodeAscii = useEncodeInto
|
||||
? (str, ERR) => {
|
||||
// Much faster in Hermes
|
||||
const codes = new Uint8Array(str.length + 4) // overshoot by a full utf8 char
|
||||
const info = nativeEncoder.encodeInto(str, codes)
|
||||
if (info.read !== str.length || info.written !== str.length) throw new SyntaxError(ERR) // non-ascii
|
||||
return codes.subarray(0, str.length)
|
||||
}
|
||||
: nativeBuffer
|
||||
? (str, ERR) => {
|
||||
// TextEncoder is slow on Node.js 24 / 25 (was ok on 22)
|
||||
const codes = nativeBuffer.from(str, 'utf8') // ascii/latin1 coerces, we need to check
|
||||
if (codes.length !== str.length) throw new SyntaxError(ERR) // non-ascii
|
||||
return new Uint8Array(codes.buffer, codes.byteOffset, codes.byteLength)
|
||||
}
|
||||
: (str, ERR) => {
|
||||
const codes = nativeEncoder.encode(str)
|
||||
if (codes.length !== str.length) throw new SyntaxError(ERR) // non-ascii
|
||||
return codes
|
||||
}
|
||||
1
node_modules/@exodus/bytes/fallback/multi-byte.encodings.cjs
generated
vendored
Normal file
1
node_modules/@exodus/bytes/fallback/multi-byte.encodings.cjs
generated
vendored
Normal file
@@ -0,0 +1 @@
|
||||
module.exports = () => require('./multi-byte.encodings.json') // lazy-load
|
||||
546
node_modules/@exodus/bytes/fallback/multi-byte.encodings.json
generated
vendored
Normal file
546
node_modules/@exodus/bytes/fallback/multi-byte.encodings.json
generated
vendored
Normal file
File diff suppressed because one or more lines are too long
962
node_modules/@exodus/bytes/fallback/multi-byte.js
generated
vendored
Normal file
962
node_modules/@exodus/bytes/fallback/multi-byte.js
generated
vendored
Normal file
@@ -0,0 +1,962 @@
|
||||
import { E_STRING } from './_utils.js'
|
||||
import { nativeEncoder } from './platform.js'
|
||||
import { asciiPrefix, decodeAscii, decodeLatin1, decodeUCS2 } from './latin1.js'
|
||||
import { getTable } from './multi-byte.table.js'
|
||||
|
||||
export const E_STRICT = 'Input is not well-formed for this encoding'
|
||||
|
||||
/* Decoders */
|
||||
|
||||
// If the decoder is not cleared properly, state can be preserved between non-streaming calls!
|
||||
// See comment about fatal stream
|
||||
|
||||
// All except iso-2022-jp are ASCII supersets
|
||||
// When adding something that is not an ASCII superset, ajust the ASCII fast path
|
||||
const mappers = {
|
||||
// https://encoding.spec.whatwg.org/#euc-kr-decoder
|
||||
'euc-kr': (err) => {
|
||||
const euc = getTable('euc-kr')
|
||||
let lead = 0
|
||||
let oi = 0
|
||||
let o16
|
||||
|
||||
const decodeLead = (b) => {
|
||||
if (b < 0x41 || b > 0xfe) {
|
||||
lead = 0
|
||||
o16[oi++] = err()
|
||||
if (b < 128) o16[oi++] = b
|
||||
} else {
|
||||
const p = euc[(lead - 0x81) * 190 + b - 0x41]
|
||||
lead = 0
|
||||
if (p) {
|
||||
o16[oi++] = p
|
||||
} else {
|
||||
o16[oi++] = err()
|
||||
if (b < 128) o16[oi++] = b
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const decode = (arr, start, end, stream) => {
|
||||
let i = start
|
||||
o16 = new Uint16Array(end - start + (lead ? 1 : 0)) // there are pairs but they consume more than one byte
|
||||
oi = 0
|
||||
|
||||
// Fast path
|
||||
if (!lead) {
|
||||
for (const last1 = end - 1; i < last1; ) {
|
||||
const l = arr[i]
|
||||
if (l < 128) {
|
||||
o16[oi++] = l
|
||||
i++
|
||||
} else {
|
||||
if (l === 0x80 || l === 0xff) break
|
||||
const b = arr[i + 1]
|
||||
if (b < 0x41 || b === 0xff) break
|
||||
const p = euc[(l - 0x81) * 190 + b - 0x41]
|
||||
if (!p) break
|
||||
o16[oi++] = p
|
||||
i += 2
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (lead && i < end) decodeLead(arr[i++])
|
||||
while (i < end) {
|
||||
const b = arr[i++]
|
||||
if (b < 128) {
|
||||
o16[oi++] = b
|
||||
} else if (b === 0x80 || b === 0xff) {
|
||||
o16[oi++] = err()
|
||||
} else {
|
||||
lead = b
|
||||
if (i < end) decodeLead(arr[i++])
|
||||
}
|
||||
}
|
||||
|
||||
if (lead && !stream) {
|
||||
lead = 0
|
||||
o16[oi++] = err()
|
||||
}
|
||||
|
||||
const res = decodeUCS2(o16, oi)
|
||||
o16 = null
|
||||
return res
|
||||
}
|
||||
|
||||
return { decode, isAscii: () => lead === 0 }
|
||||
},
|
||||
// https://encoding.spec.whatwg.org/#euc-jp-decoder
|
||||
'euc-jp': (err) => {
|
||||
const jis0208 = getTable('jis0208')
|
||||
const jis0212 = getTable('jis0212')
|
||||
let j12 = false
|
||||
let lead = 0
|
||||
let oi = 0
|
||||
let o16
|
||||
|
||||
const decodeLead = (b) => {
|
||||
if (lead === 0x8e && b >= 0xa1 && b <= 0xdf) {
|
||||
lead = 0
|
||||
o16[oi++] = 0xfe_c0 + b
|
||||
} else if (lead === 0x8f && b >= 0xa1 && b <= 0xfe) {
|
||||
j12 = true
|
||||
lead = b
|
||||
} else {
|
||||
let cp
|
||||
if (lead >= 0xa1 && lead <= 0xfe && b >= 0xa1 && b <= 0xfe) {
|
||||
cp = (j12 ? jis0212 : jis0208)[(lead - 0xa1) * 94 + b - 0xa1]
|
||||
}
|
||||
|
||||
lead = 0
|
||||
j12 = false
|
||||
if (cp) {
|
||||
o16[oi++] = cp
|
||||
} else {
|
||||
o16[oi++] = err()
|
||||
if (b < 128) o16[oi++] = b
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const decode = (arr, start, end, stream) => {
|
||||
let i = start
|
||||
o16 = new Uint16Array(end - start + (lead ? 1 : 0))
|
||||
oi = 0
|
||||
|
||||
// Fast path, non-j12
|
||||
// lead = 0 means j12 = 0
|
||||
if (!lead) {
|
||||
for (const last1 = end - 1; i < last1; ) {
|
||||
const l = arr[i]
|
||||
if (l < 128) {
|
||||
o16[oi++] = l
|
||||
i++
|
||||
} else {
|
||||
const b = arr[i + 1]
|
||||
if (l === 0x8e && b >= 0xa1 && b <= 0xdf) {
|
||||
o16[oi++] = 0xfe_c0 + b
|
||||
i += 2
|
||||
} else {
|
||||
if (l < 0xa1 || l === 0xff || b < 0xa1 || b === 0xff) break
|
||||
const cp = jis0208[(l - 0xa1) * 94 + b - 0xa1]
|
||||
if (!cp) break
|
||||
o16[oi++] = cp
|
||||
i += 2
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (lead && i < end) decodeLead(arr[i++])
|
||||
if (lead && i < end) decodeLead(arr[i++]) // could be two leads, but no more
|
||||
while (i < end) {
|
||||
const b = arr[i++]
|
||||
if (b < 128) {
|
||||
o16[oi++] = b
|
||||
} else if ((b < 0xa1 && b !== 0x8e && b !== 0x8f) || b === 0xff) {
|
||||
o16[oi++] = err()
|
||||
} else {
|
||||
lead = b
|
||||
if (i < end) decodeLead(arr[i++])
|
||||
if (lead && i < end) decodeLead(arr[i++]) // could be two leads
|
||||
}
|
||||
}
|
||||
|
||||
if (lead && !stream) {
|
||||
lead = 0
|
||||
j12 = false // can be true only when lead is non-zero
|
||||
o16[oi++] = err()
|
||||
}
|
||||
|
||||
const res = decodeUCS2(o16, oi)
|
||||
o16 = null
|
||||
return res
|
||||
}
|
||||
|
||||
return { decode, isAscii: () => lead === 0 } // j12 can be true only when lead is non-zero
|
||||
},
|
||||
// https://encoding.spec.whatwg.org/#iso-2022-jp-decoder
|
||||
'iso-2022-jp': (err) => {
|
||||
const jis0208 = getTable('jis0208')
|
||||
let dState = 1
|
||||
let oState = 1
|
||||
let lead = 0 // 0 or 0x21-0x7e
|
||||
let out = false
|
||||
|
||||
const bytes = (pushback, b) => {
|
||||
if (dState < 5 && b === 0x1b) {
|
||||
dState = 6 // escape start
|
||||
return
|
||||
}
|
||||
|
||||
switch (dState) {
|
||||
case 1:
|
||||
case 2:
|
||||
// ASCII, Roman (common)
|
||||
out = false
|
||||
if (dState === 2) {
|
||||
if (b === 0x5c) return 0xa5
|
||||
if (b === 0x7e) return 0x20_3e
|
||||
}
|
||||
|
||||
if (b <= 0x7f && b !== 0x0e && b !== 0x0f) return b
|
||||
return err()
|
||||
case 3:
|
||||
// Katakana
|
||||
out = false
|
||||
if (b >= 0x21 && b <= 0x5f) return 0xff_40 + b
|
||||
return err()
|
||||
case 4:
|
||||
// Leading byte
|
||||
out = false
|
||||
if (b < 0x21 || b > 0x7e) return err()
|
||||
lead = b
|
||||
dState = 5
|
||||
return
|
||||
case 5:
|
||||
// Trailing byte
|
||||
out = false
|
||||
if (b === 0x1b) {
|
||||
dState = 6 // escape start
|
||||
return err()
|
||||
}
|
||||
|
||||
dState = 4
|
||||
if (b >= 0x21 && b <= 0x7e) {
|
||||
const cp = jis0208[(lead - 0x21) * 94 + b - 0x21]
|
||||
if (cp) return cp
|
||||
}
|
||||
|
||||
return err()
|
||||
case 6:
|
||||
// Escape start
|
||||
if (b === 0x24 || b === 0x28) {
|
||||
lead = b
|
||||
dState = 7
|
||||
return
|
||||
}
|
||||
|
||||
out = false
|
||||
dState = oState
|
||||
pushback.push(b)
|
||||
return err()
|
||||
case 7: {
|
||||
// Escape
|
||||
const l = lead
|
||||
lead = 0
|
||||
let s
|
||||
if (l === 0x28) {
|
||||
// eslint-disable-next-line unicorn/prefer-switch
|
||||
if (b === 0x42) {
|
||||
s = 1
|
||||
} else if (b === 0x4a) {
|
||||
s = 2
|
||||
} else if (b === 0x49) {
|
||||
s = 3
|
||||
}
|
||||
} else if (l === 0x24 && (b === 0x40 || b === 0x42)) {
|
||||
s = 4
|
||||
}
|
||||
|
||||
if (s) {
|
||||
dState = oState = s
|
||||
const output = out
|
||||
out = true
|
||||
return output ? err() : undefined
|
||||
}
|
||||
|
||||
out = false
|
||||
dState = oState
|
||||
pushback.push(b, l)
|
||||
return err()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const eof = (pushback) => {
|
||||
if (dState < 5) return null
|
||||
out = false
|
||||
switch (dState) {
|
||||
case 5:
|
||||
dState = 4
|
||||
return err()
|
||||
case 6:
|
||||
dState = oState
|
||||
return err()
|
||||
case 7: {
|
||||
dState = oState
|
||||
pushback.push(lead)
|
||||
lead = 0
|
||||
return err()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const decode = (arr, start, end, stream) => {
|
||||
const o16 = new Uint16Array(end - start + 2) // err in eof + lead from state
|
||||
let oi = 0
|
||||
let i = start
|
||||
const pushback = [] // local and auto-cleared
|
||||
|
||||
// First, dump everything until EOF
|
||||
// Same as the full loop, but without EOF handling
|
||||
while (i < end || pushback.length > 0) {
|
||||
const c = bytes(pushback, pushback.length > 0 ? pushback.pop() : arr[i++])
|
||||
if (c !== undefined) o16[oi++] = c // 16-bit
|
||||
}
|
||||
|
||||
// Then, dump EOF. This needs the same loop as the characters can be pushed back
|
||||
if (!stream) {
|
||||
while (i <= end || pushback.length > 0) {
|
||||
if (i < end || pushback.length > 0) {
|
||||
const c = bytes(pushback, pushback.length > 0 ? pushback.pop() : arr[i++])
|
||||
if (c !== undefined) o16[oi++] = c // 16-bit
|
||||
} else {
|
||||
const c = eof(pushback)
|
||||
if (c === null) break // clean exit
|
||||
o16[oi++] = c
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Chrome and WebKit fail on this, we don't: completely destroy the old decoder state when finished streaming
|
||||
// > If this’s do not flush is false, then set this’s decoder to a new instance of this’s encoding’s decoder,
|
||||
// > Set this’s do not flush to options["stream"]
|
||||
if (!stream) {
|
||||
dState = oState = 1
|
||||
lead = 0
|
||||
out = false
|
||||
}
|
||||
|
||||
return decodeUCS2(o16, oi)
|
||||
}
|
||||
|
||||
return { decode, isAscii: () => false }
|
||||
},
|
||||
// https://encoding.spec.whatwg.org/#shift_jis-decoder
|
||||
shift_jis: (err) => {
|
||||
const jis0208 = getTable('jis0208')
|
||||
let lead = 0
|
||||
let oi = 0
|
||||
let o16
|
||||
|
||||
const decodeLead = (b) => {
|
||||
const l = lead
|
||||
lead = 0
|
||||
if (b >= 0x40 && b <= 0xfc && b !== 0x7f) {
|
||||
const p = (l - (l < 0xa0 ? 0x81 : 0xc1)) * 188 + b - (b < 0x7f ? 0x40 : 0x41)
|
||||
if (p >= 8836 && p <= 10_715) {
|
||||
o16[oi++] = 0xe0_00 - 8836 + p
|
||||
return
|
||||
}
|
||||
|
||||
const cp = jis0208[p]
|
||||
if (cp) {
|
||||
o16[oi++] = cp
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
o16[oi++] = err()
|
||||
if (b < 128) o16[oi++] = b
|
||||
}
|
||||
|
||||
const decode = (arr, start, end, stream) => {
|
||||
o16 = new Uint16Array(end - start + (lead ? 1 : 0))
|
||||
oi = 0
|
||||
let i = start
|
||||
|
||||
// Fast path
|
||||
if (!lead) {
|
||||
for (const last1 = end - 1; i < last1; ) {
|
||||
const l = arr[i]
|
||||
if (l <= 0x80) {
|
||||
o16[oi++] = l
|
||||
i++
|
||||
} else if (l >= 0xa1 && l <= 0xdf) {
|
||||
o16[oi++] = 0xfe_c0 + l
|
||||
i++
|
||||
} else {
|
||||
if (l === 0xa0 || l > 0xfc) break
|
||||
const b = arr[i + 1]
|
||||
if (b < 0x40 || b > 0xfc || b === 0x7f) break
|
||||
const p = (l - (l < 0xa0 ? 0x81 : 0xc1)) * 188 + b - (b < 0x7f ? 0x40 : 0x41)
|
||||
if (p >= 8836 && p <= 10_715) {
|
||||
o16[oi++] = 0xe0_00 - 8836 + p
|
||||
i += 2
|
||||
} else {
|
||||
const cp = jis0208[p]
|
||||
if (!cp) break
|
||||
o16[oi++] = cp
|
||||
i += 2
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (lead && i < end) decodeLead(arr[i++])
|
||||
while (i < end) {
|
||||
const b = arr[i++]
|
||||
if (b <= 0x80) {
|
||||
o16[oi++] = b // 0x80 is allowed
|
||||
} else if (b >= 0xa1 && b <= 0xdf) {
|
||||
o16[oi++] = 0xfe_c0 + b
|
||||
} else if (b === 0xa0 || b > 0xfc) {
|
||||
o16[oi++] = err()
|
||||
} else {
|
||||
lead = b
|
||||
if (i < end) decodeLead(arr[i++])
|
||||
}
|
||||
}
|
||||
|
||||
if (lead && !stream) {
|
||||
lead = 0
|
||||
o16[oi++] = err()
|
||||
}
|
||||
|
||||
const res = decodeUCS2(o16, oi)
|
||||
o16 = null
|
||||
return res
|
||||
}
|
||||
|
||||
return { decode, isAscii: () => lead === 0 }
|
||||
},
|
||||
// https://encoding.spec.whatwg.org/#gbk-decoder
|
||||
gbk: (err) => mappers.gb18030(err), // 10.1.1. GBK’s decoder is gb18030’s decoder
|
||||
// https://encoding.spec.whatwg.org/#gb18030-decoder
|
||||
gb18030: (err) => {
|
||||
const gb18030 = getTable('gb18030')
|
||||
const gb18030r = getTable('gb18030-ranges')
|
||||
let g1 = 0, g2 = 0, g3 = 0 // prettier-ignore
|
||||
const index = (p) => {
|
||||
if ((p > 39_419 && p < 189_000) || p > 1_237_575) return
|
||||
if (p === 7457) return 0xe7_c7
|
||||
let a = 0, b = 0 // prettier-ignore
|
||||
for (const [c, d] of gb18030r) {
|
||||
if (c > p) break
|
||||
a = c
|
||||
b = d
|
||||
}
|
||||
|
||||
return b + p - a
|
||||
}
|
||||
|
||||
// g1 is 0 or 0x81-0xfe
|
||||
// g2 is 0 or 0x30-0x39
|
||||
// g3 is 0 or 0x81-0xfe
|
||||
|
||||
const decode = (arr, start, end, stream) => {
|
||||
const o16 = new Uint16Array(end - start + (g1 ? 3 : 0)) // even with pushback it's at most 1 char per byte
|
||||
let oi = 0
|
||||
let i = start
|
||||
const pushback = [] // local and auto-cleared
|
||||
|
||||
// Fast path for 2-byte only
|
||||
// pushback is always empty ad start, and g1 = 0 means g2 = g3 = 0
|
||||
if (g1 === 0) {
|
||||
for (const last1 = end - 1; i < last1; ) {
|
||||
const b = arr[i]
|
||||
if (b < 128) {
|
||||
o16[oi++] = b
|
||||
i++
|
||||
} else if (b === 0x80) {
|
||||
o16[oi++] = 0x20_ac
|
||||
i++
|
||||
} else {
|
||||
if (b === 0xff) break
|
||||
const n = arr[i + 1]
|
||||
let cp
|
||||
if (n < 0x7f) {
|
||||
if (n < 0x40) break
|
||||
cp = gb18030[(b - 0x81) * 190 + n - 0x40]
|
||||
} else {
|
||||
if (n === 0xff || n === 0x7f) break
|
||||
cp = gb18030[(b - 0x81) * 190 + n - 0x41]
|
||||
}
|
||||
|
||||
if (!cp) break
|
||||
o16[oi++] = cp // 16-bit
|
||||
i += 2
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// First, dump everything until EOF
|
||||
// Same as the full loop, but without EOF handling
|
||||
while (i < end || pushback.length > 0) {
|
||||
const b = pushback.length > 0 ? pushback.pop() : arr[i++]
|
||||
if (g1) {
|
||||
// g2 can be set only when g1 is set, g3 can be set only when g2 is set
|
||||
// hence, 3 checks for g3 is faster than 3 checks for g1
|
||||
if (g2) {
|
||||
if (g3) {
|
||||
if (b <= 0x39 && b >= 0x30) {
|
||||
const p = index(
|
||||
(g1 - 0x81) * 12_600 + (g2 - 0x30) * 1260 + (g3 - 0x81) * 10 + b - 0x30
|
||||
)
|
||||
g1 = g2 = g3 = 0
|
||||
if (p === undefined) {
|
||||
o16[oi++] = err()
|
||||
} else if (p <= 0xff_ff) {
|
||||
o16[oi++] = p // Can validly return replacement
|
||||
} else {
|
||||
const d = p - 0x1_00_00
|
||||
o16[oi++] = 0xd8_00 | (d >> 10)
|
||||
o16[oi++] = 0xdc_00 | (d & 0x3_ff)
|
||||
}
|
||||
} else {
|
||||
pushback.push(b, g3, g2)
|
||||
g1 = g2 = g3 = 0
|
||||
o16[oi++] = err()
|
||||
}
|
||||
} else if (b >= 0x81 && b <= 0xfe) {
|
||||
g3 = b
|
||||
} else {
|
||||
pushback.push(b, g2)
|
||||
g1 = g2 = 0
|
||||
o16[oi++] = err()
|
||||
}
|
||||
} else if (b <= 0x39 && b >= 0x30) {
|
||||
g2 = b
|
||||
} else {
|
||||
let cp
|
||||
if (b >= 0x40 && b <= 0xfe && b !== 0x7f) {
|
||||
cp = gb18030[(g1 - 0x81) * 190 + b - (b < 0x7f ? 0x40 : 0x41)]
|
||||
}
|
||||
|
||||
g1 = 0
|
||||
if (cp) {
|
||||
o16[oi++] = cp // 16-bit
|
||||
} else {
|
||||
o16[oi++] = err()
|
||||
if (b < 128) o16[oi++] = b // can be processed immediately
|
||||
}
|
||||
}
|
||||
} else if (b < 128) {
|
||||
o16[oi++] = b
|
||||
} else if (b === 0x80) {
|
||||
o16[oi++] = 0x20_ac
|
||||
} else if (b === 0xff) {
|
||||
o16[oi++] = err()
|
||||
} else {
|
||||
g1 = b
|
||||
}
|
||||
}
|
||||
|
||||
// if g1 = 0 then g2 = g3 = 0
|
||||
if (g1 && !stream) {
|
||||
g1 = g2 = g3 = 0
|
||||
o16[oi++] = err()
|
||||
}
|
||||
|
||||
return decodeUCS2(o16, oi)
|
||||
}
|
||||
|
||||
return { decode, isAscii: () => g1 === 0 } // if g1 = 0 then g2 = g3 = 0
|
||||
},
|
||||
// https://encoding.spec.whatwg.org/#big5
|
||||
big5: (err) => {
|
||||
// The only decoder which returns multiple codepoints per byte, also has non-charcode codepoints
|
||||
// We store that as strings
|
||||
const big5 = getTable('big5')
|
||||
let lead = 0
|
||||
let oi = 0
|
||||
let o16
|
||||
|
||||
const decodeLead = (b) => {
|
||||
if (b < 0x40 || (b > 0x7e && b < 0xa1) || b === 0xff) {
|
||||
lead = 0
|
||||
o16[oi++] = err()
|
||||
if (b < 128) o16[oi++] = b
|
||||
} else {
|
||||
const p = big5[(lead - 0x81) * 157 + b - (b < 0x7f ? 0x40 : 0x62)]
|
||||
lead = 0
|
||||
if (p > 0x1_00_00) {
|
||||
o16[oi++] = p >> 16
|
||||
o16[oi++] = p & 0xff_ff
|
||||
} else if (p) {
|
||||
o16[oi++] = p
|
||||
} else {
|
||||
o16[oi++] = err()
|
||||
if (b < 128) o16[oi++] = b
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// eslint-disable-next-line sonarjs/no-identical-functions
|
||||
const decode = (arr, start, end, stream) => {
|
||||
let i = start
|
||||
o16 = new Uint16Array(end - start + (lead ? 1 : 0)) // there are pairs but they consume more than one byte
|
||||
oi = 0
|
||||
|
||||
// Fast path
|
||||
if (!lead) {
|
||||
for (const last1 = end - 1; i < last1; ) {
|
||||
const l = arr[i]
|
||||
if (l < 128) {
|
||||
o16[oi++] = l
|
||||
i++
|
||||
} else {
|
||||
if (l === 0x80 || l === 0xff) break
|
||||
const b = arr[i + 1]
|
||||
if (b < 0x40 || (b > 0x7e && b < 0xa1) || b === 0xff) break
|
||||
const p = big5[(l - 0x81) * 157 + b - (b < 0x7f ? 0x40 : 0x62)]
|
||||
if (p > 0x1_00_00) {
|
||||
o16[oi++] = p >> 16
|
||||
o16[oi++] = p & 0xff_ff
|
||||
} else {
|
||||
if (!p) break
|
||||
o16[oi++] = p
|
||||
}
|
||||
|
||||
i += 2
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (lead && i < end) decodeLead(arr[i++])
|
||||
while (i < end) {
|
||||
const b = arr[i++]
|
||||
if (b < 128) {
|
||||
o16[oi++] = b
|
||||
} else if (b === 0x80 || b === 0xff) {
|
||||
o16[oi++] = err()
|
||||
} else {
|
||||
lead = b
|
||||
if (i < end) decodeLead(arr[i++])
|
||||
}
|
||||
}
|
||||
|
||||
if (lead && !stream) {
|
||||
lead = 0
|
||||
o16[oi++] = err()
|
||||
}
|
||||
|
||||
const res = decodeUCS2(o16, oi)
|
||||
o16 = null
|
||||
return res
|
||||
}
|
||||
|
||||
return { decode, isAscii: () => lead === 0 }
|
||||
},
|
||||
}
|
||||
|
||||
export const isAsciiSuperset = (enc) => enc !== 'iso-2022-jp' // all others are ASCII supersets and can use fast path
|
||||
|
||||
export function multibyteDecoder(enc, loose = false) {
|
||||
if (typeof loose !== 'boolean') throw new TypeError('loose option should be boolean')
|
||||
if (!Object.hasOwn(mappers, enc)) throw new RangeError('Unsupported encoding')
|
||||
|
||||
// Input is assumed to be typechecked already
|
||||
let mapper
|
||||
const asciiSuperset = isAsciiSuperset(enc)
|
||||
let streaming // because onErr is cached in mapper
|
||||
const onErr = loose
|
||||
? () => 0xff_fd
|
||||
: () => {
|
||||
// The correct way per spec seems to be not destoying the decoder state in stream mode, even when fatal
|
||||
// Decoders big5, euc-jp, euc-kr, shift_jis, gb18030 / gbk - all clear state before throwing unless EOF, so not affected
|
||||
// iso-2022-jp is the only tricky one one where this !stream check matters in non-stream mode
|
||||
if (!streaming) mapper = null // destroy state, effectively the same as 'do not flush' = false, but early
|
||||
throw new TypeError(E_STRICT)
|
||||
}
|
||||
|
||||
return (arr, stream = false) => {
|
||||
let res = ''
|
||||
if (asciiSuperset && (!mapper || mapper.isAscii?.())) {
|
||||
const prefixLen = asciiPrefix(arr)
|
||||
if (prefixLen === arr.length) return decodeAscii(arr) // ascii
|
||||
res = decodeLatin1(arr, 0, prefixLen) // TODO: check if decodeAscii with subarray is faster for small prefixes too
|
||||
}
|
||||
|
||||
streaming = stream // affects onErr
|
||||
if (!mapper) mapper = mappers[enc](onErr)
|
||||
return res + mapper.decode(arr, res.length, arr.length, stream)
|
||||
}
|
||||
}
|
||||
|
||||
/* Encoders */
|
||||
|
||||
const maps = new Map()
|
||||
const e7 = [[148, 236], [149, 237], [150, 243]] // prettier-ignore
|
||||
const e8 = [[30, 89], [38, 97], [43, 102], [44, 103], [50, 109], [67, 126], [84, 144], [100, 160]] // prettier-ignore
|
||||
const preencoders = {
|
||||
__proto__: null,
|
||||
big5: (p) => ((((p / 157) | 0) + 0x81) << 8) | ((p % 157 < 0x3f ? 0x40 : 0x62) + (p % 157)),
|
||||
shift_jis: (p) => {
|
||||
const l = (p / 188) | 0
|
||||
const t = p % 188
|
||||
return ((l + (l < 0x1f ? 0x81 : 0xc1)) << 8) | ((t < 0x3f ? 0x40 : 0x41) + t)
|
||||
},
|
||||
'iso-2022-jp': (p) => ((((p / 94) | 0) + 0x21) << 8) | ((p % 94) + 0x21),
|
||||
'euc-jp': (p) => ((((p / 94) | 0) + 0xa1) << 8) | ((p % 94) + 0xa1),
|
||||
'euc-kr': (p) => ((((p / 190) | 0) + 0x81) << 8) | ((p % 190) + 0x41),
|
||||
gb18030: (p) => ((((p / 190) | 0) + 0x81) << 8) | ((p % 190 < 0x3f ? 0x40 : 0x41) + (p % 190)),
|
||||
}
|
||||
|
||||
preencoders.gbk = preencoders.gb18030
|
||||
|
||||
// We accept that encoders use non-trivial amount of mem, for perf
|
||||
// most are are 128 KiB mem, big5 is 380 KiB, lazy-loaded at first use
|
||||
function getMap(id, size, ascii) {
|
||||
const cached = maps.get(id)
|
||||
if (cached) return cached
|
||||
let tname = id
|
||||
const sjis = id === 'shift_jis'
|
||||
const iso2022jp = id === 'iso-2022-jp'
|
||||
if (iso2022jp) tname = 'jis0208'
|
||||
if (id === 'gbk') tname = 'gb18030'
|
||||
if (id === 'euc-jp' || sjis) tname = 'jis0208'
|
||||
const table = getTable(tname)
|
||||
const map = new Uint16Array(size)
|
||||
const enc = preencoders[id] || ((p) => p + 1)
|
||||
for (let i = 0; i < table.length; i++) {
|
||||
const c = table[i]
|
||||
if (!c) continue
|
||||
if (id === 'big5') {
|
||||
if (i < 5024) continue // this also skips multi-codepoint strings
|
||||
// In big5, all return first entries except for these
|
||||
if (
|
||||
map[c] &&
|
||||
c !== 0x25_50 &&
|
||||
c !== 0x25_5e &&
|
||||
c !== 0x25_61 &&
|
||||
c !== 0x25_6a &&
|
||||
c !== 0x53_41 &&
|
||||
c !== 0x53_45
|
||||
) {
|
||||
continue
|
||||
}
|
||||
} else {
|
||||
if (sjis && i >= 8272 && i <= 8835) continue
|
||||
if (map[c]) continue
|
||||
}
|
||||
|
||||
if (c > 0xff_ff) {
|
||||
// always a single codepoint here
|
||||
const s = String.fromCharCode(c >> 16, c & 0xff_ff)
|
||||
map[s.codePointAt(0)] = enc(i)
|
||||
} else {
|
||||
map[c] = enc(i)
|
||||
}
|
||||
}
|
||||
|
||||
if (ascii) for (let i = 0; i < 0x80; i++) map[i] = i
|
||||
if (sjis || id === 'euc-jp') {
|
||||
if (sjis) map[0x80] = 0x80
|
||||
const d = sjis ? 0xfe_c0 : 0x70_c0
|
||||
for (let i = 0xff_61; i <= 0xff_9f; i++) map[i] = i - d
|
||||
map[0x22_12] = map[0xff_0d]
|
||||
map[0xa5] = 0x5c
|
||||
map[0x20_3e] = 0x7e
|
||||
} else if (tname === 'gb18030') {
|
||||
if (id === 'gbk') map[0x20_ac] = 0x80
|
||||
for (let i = 0xe7_8d; i <= 0xe7_93; i++) map[i] = i - 0x40_b4
|
||||
for (const [a, b] of e7) map[0xe7_00 | a] = 0xa6_00 | b
|
||||
for (const [a, b] of e8) map[0xe8_00 | a] = 0xfe_00 | b
|
||||
}
|
||||
|
||||
maps.set(id, map)
|
||||
return map
|
||||
}
|
||||
|
||||
const NON_LATIN = /[^\x00-\xFF]/ // eslint-disable-line no-control-regex
|
||||
let gb18030r, katakana
|
||||
|
||||
export function multibyteEncoder(enc, onError) {
|
||||
if (!Object.hasOwn(mappers, enc)) throw new RangeError('Unsupported encoding')
|
||||
const size = enc === 'big5' ? 0x2_f8_a7 : 0x1_00_00 // for big5, max codepoint in table + 1
|
||||
const iso2022jp = enc === 'iso-2022-jp'
|
||||
const gb18030 = enc === 'gb18030'
|
||||
const ascii = isAsciiSuperset(enc)
|
||||
const width = iso2022jp ? 5 : gb18030 ? 4 : 2
|
||||
const tailsize = iso2022jp ? 3 : 0
|
||||
const map = getMap(enc, size, ascii)
|
||||
if (gb18030 && !gb18030r) gb18030r = getTable('gb18030-ranges')
|
||||
if (iso2022jp && !katakana) katakana = getTable('iso-2022-jp-katakana')
|
||||
return (str) => {
|
||||
if (typeof str !== 'string') throw new TypeError(E_STRING)
|
||||
if (ascii && nativeEncoder && !NON_LATIN.test(str)) {
|
||||
const u8 = nativeEncoder.encode(str)
|
||||
if (u8.length === str.length) return u8
|
||||
}
|
||||
|
||||
const length = str.length
|
||||
const u8 = new Uint8Array(length * width + tailsize)
|
||||
let i = 0
|
||||
|
||||
if (ascii) {
|
||||
while (i < length) {
|
||||
const x = str.charCodeAt(i)
|
||||
if (x >= 128) break
|
||||
u8[i++] = x
|
||||
}
|
||||
}
|
||||
|
||||
// eslint-disable-next-line unicorn/consistent-function-scoping
|
||||
const err = (code) => {
|
||||
if (onError) return onError(code, u8, i)
|
||||
throw new TypeError(E_STRICT)
|
||||
}
|
||||
|
||||
if (!map || map.length < size) /* c8 ignore next */ throw new Error('Unreachable') // Important for perf
|
||||
|
||||
if (iso2022jp) {
|
||||
let state = 0 // 0 = ASCII, 1 = Roman, 2 = jis0208
|
||||
const restore = () => {
|
||||
state = 0
|
||||
u8[i++] = 0x1b
|
||||
u8[i++] = 0x28
|
||||
u8[i++] = 0x42
|
||||
}
|
||||
|
||||
for (let j = 0; j < length; j++) {
|
||||
let x = str.charCodeAt(j)
|
||||
if (x >= 0xd8_00 && x < 0xe0_00) {
|
||||
if (state === 2) restore()
|
||||
if (x >= 0xdc_00 || j + 1 === length) {
|
||||
i += err(x) // lone
|
||||
} else {
|
||||
const x1 = str.charCodeAt(j + 1)
|
||||
if (x1 < 0xdc_00 || x1 >= 0xe0_00) {
|
||||
i += err(x) // lone
|
||||
} else {
|
||||
j++ // consume x1
|
||||
i += err(0x1_00_00 + ((x1 & 0x3_ff) | ((x & 0x3_ff) << 10)))
|
||||
}
|
||||
}
|
||||
} else if (x < 0x80) {
|
||||
if (state === 2 || (state === 1 && (x === 0x5c || x === 0x7e))) restore()
|
||||
if (x === 0xe || x === 0xf || x === 0x1b) {
|
||||
i += err(0xff_fd) // 12.2.2. step 3: This returns U+FFFD rather than codePoint to prevent attacks
|
||||
} else {
|
||||
u8[i++] = x
|
||||
}
|
||||
} else if (x === 0xa5 || x === 0x20_3e) {
|
||||
if (state !== 1) {
|
||||
state = 1
|
||||
u8[i++] = 0x1b
|
||||
u8[i++] = 0x28
|
||||
u8[i++] = 0x4a
|
||||
}
|
||||
|
||||
u8[i++] = x === 0xa5 ? 0x5c : 0x7e
|
||||
} else {
|
||||
if (x === 0x22_12) x = 0xff_0d
|
||||
if (x >= 0xff_61 && x <= 0xff_9f) x = katakana[x - 0xff_61]
|
||||
const e = map[x]
|
||||
if (e) {
|
||||
if (state !== 2) {
|
||||
state = 2
|
||||
u8[i++] = 0x1b
|
||||
u8[i++] = 0x24
|
||||
u8[i++] = 0x42
|
||||
}
|
||||
|
||||
u8[i++] = e >> 8
|
||||
u8[i++] = e & 0xff
|
||||
} else {
|
||||
if (state === 2) restore()
|
||||
i += err(x)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (state) restore()
|
||||
} else if (gb18030) {
|
||||
// Deduping this branch hurts other encoders perf
|
||||
const encode = (cp) => {
|
||||
let a = 0, b = 0 // prettier-ignore
|
||||
for (const [c, d] of gb18030r) {
|
||||
if (d > cp) break
|
||||
a = c
|
||||
b = d
|
||||
}
|
||||
|
||||
let rp = cp === 0xe7_c7 ? 7457 : a + cp - b
|
||||
u8[i++] = 0x81 + ((rp / 12_600) | 0)
|
||||
rp %= 12_600
|
||||
u8[i++] = 0x30 + ((rp / 1260) | 0)
|
||||
rp %= 1260
|
||||
u8[i++] = 0x81 + ((rp / 10) | 0)
|
||||
u8[i++] = 0x30 + (rp % 10)
|
||||
}
|
||||
|
||||
for (let j = i; j < length; j++) {
|
||||
const x = str.charCodeAt(j)
|
||||
if (x >= 0xd8_00 && x < 0xe0_00) {
|
||||
if (x >= 0xdc_00 || j + 1 === length) {
|
||||
i += err(x) // lone
|
||||
} else {
|
||||
const x1 = str.charCodeAt(j + 1)
|
||||
if (x1 < 0xdc_00 || x1 >= 0xe0_00) {
|
||||
i += err(x) // lone
|
||||
} else {
|
||||
j++ // consume x1
|
||||
encode(0x1_00_00 + ((x1 & 0x3_ff) | ((x & 0x3_ff) << 10)))
|
||||
}
|
||||
}
|
||||
} else {
|
||||
const e = map[x]
|
||||
if (e & 0xff_00) {
|
||||
u8[i++] = e >> 8
|
||||
u8[i++] = e & 0xff
|
||||
} else if (e || x === 0) {
|
||||
u8[i++] = e
|
||||
} else if (x === 0xe5_e5) {
|
||||
i += err(x)
|
||||
} else {
|
||||
encode(x)
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
const long =
|
||||
enc === 'big5'
|
||||
? (x) => {
|
||||
const e = map[x]
|
||||
if (e & 0xff_00) {
|
||||
u8[i++] = e >> 8
|
||||
u8[i++] = e & 0xff
|
||||
} else if (e || x === 0) {
|
||||
u8[i++] = e
|
||||
} else {
|
||||
i += err(x)
|
||||
}
|
||||
}
|
||||
: (x) => {
|
||||
i += err(x)
|
||||
}
|
||||
|
||||
for (let j = i; j < length; j++) {
|
||||
const x = str.charCodeAt(j)
|
||||
if (x >= 0xd8_00 && x < 0xe0_00) {
|
||||
if (x >= 0xdc_00 || j + 1 === length) {
|
||||
i += err(x) // lone
|
||||
} else {
|
||||
const x1 = str.charCodeAt(j + 1)
|
||||
if (x1 < 0xdc_00 || x1 >= 0xe0_00) {
|
||||
i += err(x) // lone
|
||||
} else {
|
||||
j++ // consume x1
|
||||
long(0x1_00_00 + ((x1 & 0x3_ff) | ((x & 0x3_ff) << 10)))
|
||||
}
|
||||
}
|
||||
} else {
|
||||
const e = map[x]
|
||||
if (e & 0xff_00) {
|
||||
u8[i++] = e >> 8
|
||||
u8[i++] = e & 0xff
|
||||
} else if (e || x === 0) {
|
||||
u8[i++] = e
|
||||
} else {
|
||||
i += err(x)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return i === u8.length ? u8 : u8.slice(0, i)
|
||||
}
|
||||
}
|
||||
118
node_modules/@exodus/bytes/fallback/multi-byte.table.js
generated
vendored
Normal file
118
node_modules/@exodus/bytes/fallback/multi-byte.table.js
generated
vendored
Normal file
@@ -0,0 +1,118 @@
|
||||
import { fromBase64url } from '@exodus/bytes/base64.js'
|
||||
import { utf16toString } from '@exodus/bytes/utf16.js'
|
||||
import loadEncodings from './multi-byte.encodings.cjs'
|
||||
|
||||
export const sizes = {
|
||||
jis0208: 11_104,
|
||||
jis0212: 7211,
|
||||
'euc-kr': 23_750,
|
||||
gb18030: 23_940,
|
||||
big5: 19_782,
|
||||
}
|
||||
|
||||
// This is huge. It's _much_ smaller than https://npmjs.com/text-encoding though
|
||||
// Exactly as mapped by the index table
|
||||
// 0,x - hole of x empty elements
|
||||
// n,c - continious [c, ...] of length n
|
||||
// $.. - references to common chunks
|
||||
// -{x} - same as 1,{x}
|
||||
|
||||
// See tests/multi-byte.test.js to verify that this data decodes exactly into the encoding spec tables
|
||||
|
||||
let indices
|
||||
const tables = new Map()
|
||||
/* eslint-disable @exodus/mutable/no-param-reassign-prop-only */
|
||||
|
||||
function loadBase64(str) {
|
||||
const x = fromBase64url(str)
|
||||
const len = x.length
|
||||
const len2 = len >> 1
|
||||
const y = new Uint8Array(len)
|
||||
let a = -1, b = 0 // prettier-ignore
|
||||
for (let i = 0, j = 0; i < len; i += 2, j++) {
|
||||
a = (a + x[j] + 1) & 0xff
|
||||
b = (b + x[len2 + j]) & 0xff
|
||||
y[i] = a
|
||||
y[i + 1] = b
|
||||
}
|
||||
|
||||
return y
|
||||
}
|
||||
|
||||
function unwrap(res, t, pos) {
|
||||
let code = 0
|
||||
for (let i = 0; i < t.length; i++) {
|
||||
let x = t[i]
|
||||
if (typeof x === 'number') {
|
||||
if (x === 0) {
|
||||
pos += t[++i]
|
||||
} else {
|
||||
if (x < 0) {
|
||||
code -= x
|
||||
x = 1
|
||||
} else {
|
||||
code += t[++i]
|
||||
}
|
||||
|
||||
for (let k = 0; k < x; k++, pos++, code++) {
|
||||
if (code <= 0xff_ff) {
|
||||
res[pos] = code
|
||||
} else {
|
||||
const c = String.fromCodePoint(code)
|
||||
res[pos] = (c.charCodeAt(0) << 16) | c.charCodeAt(1)
|
||||
}
|
||||
}
|
||||
}
|
||||
} else if (x[0] === '$' && Object.hasOwn(indices, x)) {
|
||||
pos = unwrap(res, indices[x], pos) // self-reference using shared chunks
|
||||
} else {
|
||||
let last
|
||||
// splits by codepoints
|
||||
for (const c of utf16toString(loadBase64(x), 'uint8-le')) {
|
||||
last = c
|
||||
res[pos++] = c.length === 1 ? c.charCodeAt(0) : (c.charCodeAt(0) << 16) | c.charCodeAt(1)
|
||||
}
|
||||
|
||||
code = last.codePointAt(0) + 1
|
||||
}
|
||||
}
|
||||
|
||||
return pos
|
||||
}
|
||||
|
||||
export function getTable(id) {
|
||||
const cached = tables.get(id)
|
||||
if (cached) return cached
|
||||
|
||||
if (!indices) indices = loadEncodings() // lazy-load
|
||||
if (!Object.hasOwn(indices, id)) throw new Error('Unknown encoding')
|
||||
if (!indices[id]) throw new Error('Table already used (likely incorrect bundler dedupe)')
|
||||
|
||||
let res
|
||||
if (id.endsWith('-ranges')) {
|
||||
res = []
|
||||
let a = 0, b = 0 // prettier-ignore
|
||||
const idx = indices[id]
|
||||
while (idx.length > 0) res.push([(a += idx.shift()), (b += idx.shift())]) // destroying, we remove it later anyway
|
||||
} else if (id.endsWith('-katakana')) {
|
||||
let a = -1
|
||||
res = new Uint16Array(indices[id].map((x) => (a += x + 1)))
|
||||
} else if (id === 'big5') {
|
||||
res = new Uint32Array(sizes[id]) // single or double charcodes
|
||||
unwrap(res, indices[id], 0)
|
||||
// Pointer code updates are embedded into the table
|
||||
// These are skipped in encoder as encoder uses only pointers >= (0xA1 - 0x81) * 157
|
||||
res[1133] = 0xca_03_04
|
||||
res[1135] = 0xca_03_0c
|
||||
res[1164] = 0xea_03_04
|
||||
res[1166] = 0xea_03_0c
|
||||
} else {
|
||||
if (!Object.hasOwn(sizes, id)) throw new Error('Unknown encoding')
|
||||
res = new Uint16Array(sizes[id])
|
||||
unwrap(res, indices[id], 0)
|
||||
}
|
||||
|
||||
indices[id] = null // gc
|
||||
tables.set(id, res)
|
||||
return res
|
||||
}
|
||||
31
node_modules/@exodus/bytes/fallback/percent.js
generated
vendored
Normal file
31
node_modules/@exodus/bytes/fallback/percent.js
generated
vendored
Normal file
@@ -0,0 +1,31 @@
|
||||
import { decodeAscii, encodeLatin1 } from './latin1.js'
|
||||
import { decode2string } from './platform.js'
|
||||
|
||||
const ERR = 'percentEncodeSet must be a string of unique increasing codepoints in range 0x20 - 0x7e'
|
||||
const percentMap = new Map()
|
||||
let hex, base
|
||||
|
||||
export function percentEncoder(set, spaceAsPlus = false) {
|
||||
if (typeof set !== 'string' || /[^\x20-\x7E]/.test(set)) throw new TypeError(ERR)
|
||||
if (typeof spaceAsPlus !== 'boolean') throw new TypeError('spaceAsPlus must be boolean')
|
||||
const id = set + +spaceAsPlus
|
||||
const cached = percentMap.get(id)
|
||||
if (cached) return cached
|
||||
|
||||
const n = encodeLatin1(set).sort() // string checked above to be ascii
|
||||
if (decodeAscii(n) !== set || new Set(n).size !== n.length) throw new TypeError(ERR)
|
||||
|
||||
if (!base) {
|
||||
hex = Array.from({ length: 256 }, (_, i) => `%${i.toString(16).padStart(2, '0').toUpperCase()}`)
|
||||
base = hex.map((h, i) => (i < 0x20 || i > 0x7e ? h : String.fromCharCode(i)))
|
||||
}
|
||||
|
||||
const map = base.slice() // copy
|
||||
for (const c of n) map[c] = hex[c]
|
||||
if (spaceAsPlus) map[0x20] = '+' // overrides whatever percentEncodeSet thinks about it
|
||||
|
||||
// Input is not typechecked, for internal use only
|
||||
const percentEncode = (u8, start = 0, end = u8.length) => decode2string(u8, start, end, map)
|
||||
percentMap.set(id, percentEncode)
|
||||
return percentEncode
|
||||
}
|
||||
31
node_modules/@exodus/bytes/fallback/platform.browser.js
generated
vendored
Normal file
31
node_modules/@exodus/bytes/fallback/platform.browser.js
generated
vendored
Normal file
@@ -0,0 +1,31 @@
|
||||
import { decodePartAddition as decodePart } from './platform.native.js'
|
||||
|
||||
export { isLE, encodeCharcodesPure as encodeCharcodes } from './platform.native.js'
|
||||
|
||||
export const nativeBuffer = null
|
||||
export const isHermes = false
|
||||
export const isDeno = false
|
||||
export const nativeEncoder = /* @__PURE__ */ (() => new TextEncoder())()
|
||||
export const nativeDecoder = /* @__PURE__ */ (() => new TextDecoder('utf-8', { ignoreBOM: true }))()
|
||||
export const nativeDecoderLatin1 = /* @__PURE__ */ (() =>
|
||||
new TextDecoder('latin1', { ignoreBOM: true }))()
|
||||
|
||||
export function decode2string(arr, start, end, m) {
|
||||
if (end - start > 30_000) {
|
||||
// Limit concatenation to avoid excessive GC
|
||||
// Thresholds checked on Hermes for toHex
|
||||
const concat = []
|
||||
for (let i = start; i < end; ) {
|
||||
const step = i + 500
|
||||
const iNext = step > end ? end : step
|
||||
concat.push(decodePart(arr, i, iNext, m))
|
||||
i = iNext
|
||||
}
|
||||
|
||||
const res = concat.join('')
|
||||
concat.length = 0
|
||||
return res
|
||||
}
|
||||
|
||||
return decodePart(arr, start, end, m)
|
||||
}
|
||||
2
node_modules/@exodus/bytes/fallback/platform.js
generated
vendored
Normal file
2
node_modules/@exodus/bytes/fallback/platform.js
generated
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
// platform.native actually hosts Node.js / Deno detection too
|
||||
export * from './platform.native.js'
|
||||
122
node_modules/@exodus/bytes/fallback/platform.native.js
generated
vendored
Normal file
122
node_modules/@exodus/bytes/fallback/platform.native.js
generated
vendored
Normal file
@@ -0,0 +1,122 @@
|
||||
const { Buffer } = globalThis
|
||||
const haveNativeBuffer = Buffer && !Buffer.TYPED_ARRAY_SUPPORT
|
||||
export const nativeBuffer = haveNativeBuffer ? Buffer : null
|
||||
export const isHermes = /* @__PURE__ */ (() => !!globalThis.HermesInternal)()
|
||||
export const isDeno = /* @__PURE__ */ (() => !!globalThis.Deno)()
|
||||
export const isLE = /* @__PURE__ */ (() => new Uint8Array(Uint16Array.of(258).buffer)[0] === 2)()
|
||||
|
||||
// We consider Node.js TextDecoder/TextEncoder native
|
||||
// Still needed in platform.native.js as this is re-exported to platform.js
|
||||
let isNative = (x) => x && (haveNativeBuffer || `${x}`.includes('[native code]'))
|
||||
if (!haveNativeBuffer && isNative(() => {})) isNative = () => false // e.g. XS, we don't want false positives
|
||||
|
||||
export const nativeEncoder = /* @__PURE__ */ (() =>
|
||||
isNative(globalThis.TextEncoder) ? new TextEncoder() : null)()
|
||||
export const nativeDecoder = /* @__PURE__ */ (() =>
|
||||
isNative(globalThis.TextDecoder) ? new TextDecoder('utf-8', { ignoreBOM: true }) : null)()
|
||||
|
||||
// Actually windows-1252, compatible with ascii and latin1 decoding
|
||||
// Beware that on non-latin1, i.e. on windows-1252, this is broken in ~all Node.js versions released
|
||||
// in 2025 due to a regression, so we call it Latin1 as it's usable only for that
|
||||
export const nativeDecoderLatin1 = /* @__PURE__ */ (() => {
|
||||
// Not all barebone engines with TextDecoder support something except utf-8, detect
|
||||
if (nativeDecoder) {
|
||||
try {
|
||||
return new TextDecoder('latin1', { ignoreBOM: true })
|
||||
} catch {}
|
||||
}
|
||||
|
||||
return null
|
||||
})()
|
||||
|
||||
export function decodePartAddition(a, start, end, m) {
|
||||
let o = ''
|
||||
let i = start
|
||||
for (const last3 = end - 3; i < last3; i += 4) {
|
||||
const x0 = a[i]
|
||||
const x1 = a[i + 1]
|
||||
const x2 = a[i + 2]
|
||||
const x3 = a[i + 3]
|
||||
o += m[x0]
|
||||
o += m[x1]
|
||||
o += m[x2]
|
||||
o += m[x3]
|
||||
}
|
||||
|
||||
while (i < end) o += m[a[i++]]
|
||||
return o
|
||||
}
|
||||
|
||||
// Decoding with templates is faster on Hermes
|
||||
export function decodePartTemplates(a, start, end, m) {
|
||||
let o = ''
|
||||
let i = start
|
||||
for (const last15 = end - 15; i < last15; i += 16) {
|
||||
const x0 = a[i]
|
||||
const x1 = a[i + 1]
|
||||
const x2 = a[i + 2]
|
||||
const x3 = a[i + 3]
|
||||
const x4 = a[i + 4]
|
||||
const x5 = a[i + 5]
|
||||
const x6 = a[i + 6]
|
||||
const x7 = a[i + 7]
|
||||
const x8 = a[i + 8]
|
||||
const x9 = a[i + 9]
|
||||
const x10 = a[i + 10]
|
||||
const x11 = a[i + 11]
|
||||
const x12 = a[i + 12]
|
||||
const x13 = a[i + 13]
|
||||
const x14 = a[i + 14]
|
||||
const x15 = a[i + 15]
|
||||
o += `${m[x0]}${m[x1]}${m[x2]}${m[x3]}${m[x4]}${m[x5]}${m[x6]}${m[x7]}${m[x8]}${m[x9]}${m[x10]}${m[x11]}${m[x12]}${m[x13]}${m[x14]}${m[x15]}`
|
||||
}
|
||||
|
||||
while (i < end) o += m[a[i++]]
|
||||
return o
|
||||
}
|
||||
|
||||
const decodePart = isHermes ? decodePartTemplates : decodePartAddition
|
||||
export function decode2string(arr, start, end, m) {
|
||||
if (end - start > 30_000) {
|
||||
// Limit concatenation to avoid excessive GC
|
||||
// Thresholds checked on Hermes for toHex
|
||||
const concat = []
|
||||
for (let i = start; i < end; ) {
|
||||
const step = i + 500
|
||||
const iNext = step > end ? end : step
|
||||
concat.push(decodePart(arr, i, iNext, m))
|
||||
i = iNext
|
||||
}
|
||||
|
||||
const res = concat.join('')
|
||||
concat.length = 0
|
||||
return res
|
||||
}
|
||||
|
||||
return decodePart(arr, start, end, m)
|
||||
}
|
||||
|
||||
/* eslint-disable @exodus/mutable/no-param-reassign-prop-only */
|
||||
|
||||
function encodeCharcodesHermes(str, arr) {
|
||||
const length = str.length
|
||||
if (length > 64) {
|
||||
const at = str.charCodeAt.bind(str) // faster on strings from ~64 chars on Hermes, but can be 10x slower on e.g. JSC
|
||||
for (let i = 0; i < length; i++) arr[i] = at(i)
|
||||
} else {
|
||||
for (let i = 0; i < length; i++) arr[i] = str.charCodeAt(i)
|
||||
}
|
||||
|
||||
return arr
|
||||
}
|
||||
|
||||
export function encodeCharcodesPure(str, arr) {
|
||||
const length = str.length
|
||||
// Can be optimized with unrolling, but this is not used on non-Hermes atm
|
||||
for (let i = 0; i < length; i++) arr[i] = str.charCodeAt(i)
|
||||
return arr
|
||||
}
|
||||
|
||||
/* eslint-enable @exodus/mutable/no-param-reassign-prop-only */
|
||||
|
||||
export const encodeCharcodes = isHermes ? encodeCharcodesHermes : encodeCharcodesPure
|
||||
73
node_modules/@exodus/bytes/fallback/single-byte.encodings.js
generated
vendored
Normal file
73
node_modules/@exodus/bytes/fallback/single-byte.encodings.js
generated
vendored
Normal file
@@ -0,0 +1,73 @@
|
||||
// See tests/encoding/fixtures/single-byte/dump.js for generator
|
||||
|
||||
const r = 0xff_fd
|
||||
|
||||
/* eslint-disable unicorn/numeric-separators-style, @exodus/export-default/named */
|
||||
|
||||
// Common ranges
|
||||
|
||||
// prettier-ignore
|
||||
const i2 = [189,148,0,0,63,0,116,64,0,68,0,78,0,78,0,0,63,64,114,117,0,0,123,0,0,128,149,0,149,0,0,132,0,117,0,0,32,0,85,33,0,37,0,47,0,47,0,0,32,33,83,86,0,0,92,0,0,97,118,0,118,0,0,101,474]
|
||||
// prettier-ignore
|
||||
const iB = [[58,3424],[4,r],[29,3424],[4,r]]
|
||||
const i9 = [[47], 78, [12], 83, 128, [17], 47, [12], 52, 97]
|
||||
const w1 = [8236, 0, 8088, 0, 8090, 8097, 8090, 8090, 0, 8103]
|
||||
const w2 = [8236, 0, 8088, 271, 8090, 8097, 8090, 8090, 574, 8103]
|
||||
// prettier-ignore
|
||||
const w7 = [64,0,157,[4],39,68,109,62,67,0,0,82,75,68,0,175,75,86,105,92,108,144,114,115,0,120,[3],154,104,128,143,0,158,159,0,37,78,31,36,0,0,51,44,37,0,144,44,55,74,61,77,113,83,84,0,89,[3],123,73,97,112,0,127,128]
|
||||
const w8 = [8071, 8071, 8073, 8073, 8077, 8061, 8061]
|
||||
// prettier-ignore
|
||||
const k8b = [-22,910,879,879,899,880,880,894,876,893,[8,879],894,[4,878],864,859,884,882,861,877,881,876,873,875,846,815,815,835,816,816,830,812,829,[8,815],830,[4,814],800,795,820,818,797,813,817,812,809,811]
|
||||
// prettier-ignore
|
||||
const k8a = [9344,9345,9354,9357,9360,9363,9366,9373,9380,9387,9394,9461,9464,9467,9470,[4,9473],8845,9484,8580,8580,8625,8652,8652,6,8838,20,21,25,88,[3,9392],942]
|
||||
|
||||
// prettier-ignore
|
||||
const maps = {
|
||||
ibm866: [[48,912],[3,9441],...[29,62,122,122,109,107,120,101,106,111,109,107,31,34,65,56,39,10,69,102,102,96,89,109,105,98,81,108,102,102,97,97,84,82,75,75,98,96,13,0,123,118,125,128,111].map(x=>x+9266),[16,864],785,864,786,865,787,866,792,871,-72,8480,-67,8479,8218,-89,9378,-95],
|
||||
'koi8-u': [...k8a,944,9391,944,944,[5,9391],996,944,[4,9391],846,848,9390,848,848,[5,9390],979,848,...k8b],
|
||||
'koi8-r': [...k8a,[15,9391],846,[11,9390],...k8b],
|
||||
macintosh: [68,68,69,70,77,81,86,90,88,89,90,88,89,90,91,89,90,90,91,89,90,90,91,92,90,91,92,90,94,92,93,93,8064,15,0,0,3,8061,16,56,6,0,8312,9,-4,8627,24,41,8558,0,8626,8626,-15,0,8524,8538,8535,775,8561,-17,-2,748,40,57,-1,-32,-22,8535,206,8579,8512,-28,-13,8029,-42,-11,-9,8,132,132,8003,8003,8010,8010,8004,8004,33,9459,39,159,8042,8145,8029,8029,64035,64035,8001,-42,7992,7995,8012,-35,-28,-38,-29,-33,[3,-29],-33,-27,-27,63503,-31,-24,-24,-27,60,464,485,-73,[3,479],-68,480,477,456],
|
||||
'x-mac-cyrillic': [[32,912],8064,15,1006,0,3,8061,16,863,6,0,8312,855,934,8627,853,932,8558,0,8626,8626,930,0,987,849,844,923,845,924,845,924,844,923,920,836,-22,8535,206,8579,8512,-28,-13,8029,-42,832,911,831,910,902,8003,8003,8010,8010,8004,8004,33,8007,822,901,821,900,8250,804,883,880,[31,848],8109],
|
||||
'windows-874': [8236,[4],8097,[11],...w8,[9],...iB],
|
||||
}
|
||||
|
||||
// windows-1250 - windows-1258
|
||||
// prettier-ignore
|
||||
;[
|
||||
[...w1,214,8110,206,215,239,234,0,...w8,0,8329,199,8095,191,200,224,219,0,550,566,158,0,95,[4],180,[4],204,0,0,553,143,[5],76,165,0,129,544,128,...i2],
|
||||
[898,898,8088,976,8090,8097,8090,8090,8228,8103,895,8110,894,895,893,896,962,...w8,0,8329,959,8095,958,959,957,960,0,877,956,869,0,1003,0,0,857,0,858,[4],856,0,0,852,931,989,[3],921,8285,922,0,924,840,919,920,[64,848]],
|
||||
[...w2,214,8110,198,0,239,0,0,...w8,580,8329,199,8095,183,0,224,217],
|
||||
[8236,0,8088,271,8090,8097,8090,8090,0,8103,0,8110,[5],...w8,0,8329,0,8095,[5],740,740,[7],r,[4],8038,[4],720,[3],[3,720],0,720,0,[20,720],r,[44,720],r],
|
||||
[...w2,214,8110,198,[4],...w8,580,8329,199,8095,183,0,0,217,0,...i9],
|
||||
[...w2,0,8110,[5],...w8,580,8329,0,8095,[8],8198,[5],45,[15],61,[5],[20,1264],[5,1308],[7,r],[27,1264],r,r,7953,7953,r],
|
||||
[8236,1533,8088,271,8090,8097,8090,8090,574,8103,1519,8110,198,1529,1546,1529,1567,...w8,1553,8329,1527,8095,183,8047,8047,1563,0,1387,[8],1556,[15],1377,[4],1376,1537,[22,1376],0,[4,1375],[4,1380],0,1379,0,[4,1378],[5],1373,1373,0,0,[4,1371],0,1370,1370,0,1369,0,1368,0,0,7953,7953,1491],
|
||||
[...w1,0,8110,0,27,569,41,0,...w8,0,8329,0,8095,0,18,573,0,0,r,[3],r,0,0,48,0,172,[4],23,[8],...w7,474],
|
||||
[...w2,0,8110,198,[4],...w8,580,8329,0,8095,183,0,0,217,[35],63,[8],564,[3],64,0,567,0,0,203,[7],210,549,[4],32,[8],533,[3],33,0,561,0,0,172,[7],179,8109],
|
||||
].forEach((m, i) => {
|
||||
maps[`windows-${i + 1250}`] = m
|
||||
});
|
||||
|
||||
// iso-8859-1 - iso-8859-16
|
||||
// prettier-ignore
|
||||
;[
|
||||
[], // Actual Latin1 / Unicode subset, non-WHATWG, which maps iso-8859-1 to windows-1252
|
||||
[99,566,158,0,152,180,0,0,183,180,185,205,0,207,204,0,84,553,143,0,137,165,528,0,168,165,170,190,544,192,...i2],
|
||||
[133,566,0,0,r,126,0,0,135,180,115,136,0,r,204,0,118,[4],111,0,0,120,165,100,121,0,r,189,[3],r,0,69,66,[9],r,[4],75,0,0,68,[4],143,126,[4],r,0,38,35,[9],r,[4],44,0,0,37,[4],112,95,474],
|
||||
[99,150,179,0,131,149,0,0,183,104,119,186,0,207,0,0,84,553,164,0,116,134,528,0,168,89,104,171,141,192,140,64,[6],103,68,0,78,0,74,0,0,91,64,116,122,99,[5],153,[3],139,140,0,33,[6],72,37,0,47,0,43,0,0,60,33,85,91,68,[5],122,[3],108,109,474],
|
||||
[[12,864],0,[66,864],8230,[12,864],-86,864,864],
|
||||
[[3,r],0,[7,r],1376,0,[13,r],1376,[3,r],1376,r,[26,1376],[5,r],[19,1376],[13,r]],
|
||||
[8055,8055,0,8200,8202,[4],720,[3],r,8038,[4],[3,720],0,[3,720],0,720,0,[20,720],r,[44,720],r],
|
||||
[r,[8],45,[15],61,[4],[32,r],7992,[27,1264],r,r,7953,7953,r],
|
||||
i9, // non-WHATWG, which maps iso-8859-9 to windows-1254
|
||||
[99,112,127,134,131,144,0,147,103,182,187,209,0,188,155,0,84,97,112,119,116,129,0,132,88,167,172,194,8024,173,140,64,[6],103,68,0,78,0,74,[4],116,122,[4],145,0,153,[6],33,[6],72,37,0,47,0,43,[4],85,91,[4],114,0,122,[5],57],
|
||||
iB, // non-WHATWG, which maps iso-8859-11 to windows-874
|
||||
null, // no 12
|
||||
[8060,[3],8057,0,0,48,0,172,[4],23,[4],8040,[3],...w7,7962],
|
||||
[7521,7521,0,102,102,7524,0,7640,0,7640,7520,7750,0,0,201,7534,7534,110,110,7564,7564,0,7583,7625,7582,7625,7589,7735,7623,7623,7586,[16],164,[6],7571,[6],152,[17],133,[6],7540,[6],121],
|
||||
[[3],8200,0,186,0,185,[11],201,[3],198,[3],150,150,186],
|
||||
[99,99,158,8200,8057,186,0,185,0,366,0,205,0,204,204,0,0,90,143,201,8040,0,0,198,84,351,0,150,150,186,189,[3],63,0,65,[10],64,114,[3],123,0,131,152,[4],59,316,[4],32,0,34,[10],33,83,[3],92,0,100,121,[4],28,285],
|
||||
].forEach((m, i) => {
|
||||
if (m) maps[`iso-8859-${i + 1}`] = [[33], ...m]
|
||||
});
|
||||
|
||||
export default maps
|
||||
110
node_modules/@exodus/bytes/fallback/single-byte.js
generated
vendored
Normal file
110
node_modules/@exodus/bytes/fallback/single-byte.js
generated
vendored
Normal file
@@ -0,0 +1,110 @@
|
||||
import { asciiPrefix, decodeAscii, decodeLatin1 } from './latin1.js'
|
||||
import encodings from './single-byte.encodings.js'
|
||||
import { decode2string, nativeDecoder } from './platform.js'
|
||||
|
||||
export const E_STRICT = 'Input is not well-formed for this encoding'
|
||||
const xUserDefined = 'x-user-defined'
|
||||
const iso8i = 'iso-8859-8-i'
|
||||
|
||||
export const assertEncoding = (encoding) => {
|
||||
if (Object.hasOwn(encodings, encoding) || encoding === xUserDefined || encoding === iso8i) return
|
||||
throw new RangeError('Unsupported encoding')
|
||||
}
|
||||
|
||||
const r = 0xff_fd
|
||||
|
||||
export function getEncoding(encoding) {
|
||||
assertEncoding(encoding)
|
||||
if (encoding === xUserDefined) return Array.from({ length: 128 }, (_, i) => 0xf7_80 + i)
|
||||
if (encoding === iso8i) encoding = 'iso-8859-8'
|
||||
const enc = encodings[encoding]
|
||||
const deltas = enc.flatMap((x) => (Array.isArray(x) ? new Array(x[0]).fill(x[1] ?? 0) : x))
|
||||
return deltas.map((x, i) => (x === r ? x : x + 128 + i))
|
||||
}
|
||||
|
||||
const mappers = new Map()
|
||||
const decoders = new Map()
|
||||
const encmaps = new Map()
|
||||
|
||||
// Used only on Node.js, no reason to optimize for anything else
|
||||
// E.g. avoiding .from and filling zero-initialized arr manually is faster on Hermes, but we avoid this codepath on Hermes completely
|
||||
export function encodingMapper(encoding) {
|
||||
const cached = mappers.get(encoding)
|
||||
if (cached) return cached
|
||||
|
||||
const codes = getEncoding(encoding)
|
||||
const incomplete = codes.includes(r)
|
||||
let map
|
||||
const mapper = (arr, start = 0) => {
|
||||
if (!map) {
|
||||
map = new Uint16Array(256).map((_, i) => i) // Unicode subset
|
||||
map.set(Uint16Array.from(codes), 128)
|
||||
}
|
||||
|
||||
const o = Uint16Array.from(start === 0 ? arr : arr.subarray(start)) // copy to modify in-place, also those are 16-bit now
|
||||
let i = 0
|
||||
for (const end7 = o.length - 7; i < end7; i += 8) {
|
||||
o[i] = map[o[i]]
|
||||
o[i + 1] = map[o[i + 1]]
|
||||
o[i + 2] = map[o[i + 2]]
|
||||
o[i + 3] = map[o[i + 3]]
|
||||
o[i + 4] = map[o[i + 4]]
|
||||
o[i + 5] = map[o[i + 5]]
|
||||
o[i + 6] = map[o[i + 6]]
|
||||
o[i + 7] = map[o[i + 7]]
|
||||
}
|
||||
|
||||
for (const end = o.length; i < end; i++) o[i] = map[o[i]]
|
||||
return o
|
||||
}
|
||||
|
||||
mappers.set(encoding, { mapper, incomplete })
|
||||
return { mapper, incomplete }
|
||||
}
|
||||
|
||||
export function encodingDecoder(encoding) {
|
||||
const cached = decoders.get(encoding)
|
||||
if (cached) return cached
|
||||
const isLatin1 = encoding === 'iso-8859-1'
|
||||
if (isLatin1 && !nativeDecoder) return (arr, loose = false) => decodeLatin1(arr) // native decoder is faster for ascii below
|
||||
|
||||
let strings
|
||||
const codes = getEncoding(encoding)
|
||||
const incomplete = codes.includes(r)
|
||||
const decoder = (arr, loose = false) => {
|
||||
if (!strings) {
|
||||
const allCodes = Array.from({ length: 128 }, (_, i) => i).concat(codes)
|
||||
while (allCodes.length < 256) allCodes.push(allCodes.length)
|
||||
strings = allCodes.map((c) => String.fromCharCode(c))
|
||||
}
|
||||
|
||||
const prefixLen = asciiPrefix(arr)
|
||||
if (prefixLen === arr.length) return decodeAscii(arr)
|
||||
if (isLatin1) return decodeLatin1(arr) // TODO: check if decodeAscii with subarray is faster for small prefixes too
|
||||
const prefix = decodeLatin1(arr, 0, prefixLen) // TODO: check if decodeAscii with subarray is faster for small prefixes too
|
||||
const suffix = decode2string(arr, prefix.length, arr.length, strings)
|
||||
if (!loose && incomplete && suffix.includes('\uFFFD')) throw new TypeError(E_STRICT)
|
||||
return prefix + suffix
|
||||
}
|
||||
|
||||
decoders.set(encoding, decoder)
|
||||
return decoder
|
||||
}
|
||||
|
||||
export function encodeMap(encoding) {
|
||||
const cached = encmaps.get(encoding)
|
||||
if (cached) return cached
|
||||
|
||||
const codes = getEncoding(encoding)
|
||||
let max = 128
|
||||
while (codes.length < 128) codes.push(128 + codes.length)
|
||||
for (const code of codes) if (code > max && code !== r) max = code
|
||||
const map = new Uint8Array(max + 1) // < 10 KiB for all except macintosh, 63 KiB for macintosh
|
||||
for (let i = 0; i < 128; i++) {
|
||||
map[i] = i
|
||||
if (codes[i] !== r) map[codes[i]] = 128 + i
|
||||
}
|
||||
|
||||
encmaps.set(encoding, map)
|
||||
return map
|
||||
}
|
||||
266
node_modules/@exodus/bytes/fallback/utf16.js
generated
vendored
Normal file
266
node_modules/@exodus/bytes/fallback/utf16.js
generated
vendored
Normal file
@@ -0,0 +1,266 @@
|
||||
import { decodeUCS2 } from './latin1.js'
|
||||
import { assertU8, E_STRING, E_STRICT_UNICODE } from './_utils.js'
|
||||
import { nativeDecoder, isLE, encodeCharcodes } from './platform.js'
|
||||
|
||||
export const E_STRICT = 'Input is not well-formed utf16'
|
||||
const isWellFormedStr = /* @__PURE__ */ (() => String.prototype.isWellFormed)()
|
||||
const toWellFormedStr = /* @__PURE__ */ (() => String.prototype.toWellFormed)()
|
||||
|
||||
const replacementCodepoint = 0xff_fd
|
||||
const replacementCodepointSwapped = 0xfd_ff
|
||||
|
||||
const to16 = (a) => new Uint16Array(a.buffer, a.byteOffset, a.byteLength / 2) // Requires checked length and alignment!
|
||||
|
||||
export function encodeApi(str, loose, format) {
|
||||
if (typeof str !== 'string') throw new TypeError(E_STRING)
|
||||
if (format !== 'uint16' && format !== 'uint8-le' && format !== 'uint8-be') {
|
||||
throw new TypeError('Unknown format')
|
||||
}
|
||||
|
||||
// On v8 and SpiderMonkey, check via isWellFormed is faster than js
|
||||
// On JSC, check during loop is faster than isWellFormed
|
||||
// If isWellFormed is available, we skip check during decoding and recheck after
|
||||
// If isWellFormed is unavailable, we check in js during decoding
|
||||
if (!loose && isWellFormedStr && !isWellFormedStr.call(str)) throw new TypeError(E_STRICT_UNICODE)
|
||||
const shouldSwap = (isLE && format === 'uint8-be') || (!isLE && format === 'uint8-le')
|
||||
const u16 = encode(str, loose, !loose && isWellFormedStr, shouldSwap)
|
||||
|
||||
// Bytes are already swapped and format is already checked, we need to just cast the view
|
||||
return format === 'uint16' ? u16 : new Uint8Array(u16.buffer, u16.byteOffset, u16.byteLength)
|
||||
}
|
||||
|
||||
const fatalLE = nativeDecoder ? new TextDecoder('utf-16le', { ignoreBOM: true, fatal: true }) : null
|
||||
const looseLE = nativeDecoder ? new TextDecoder('utf-16le', { ignoreBOM: true }) : null
|
||||
const fatalBE = nativeDecoder ? new TextDecoder('utf-16be', { ignoreBOM: true, fatal: true }) : null
|
||||
const looseBE = nativeDecoder ? new TextDecoder('utf-16be', { ignoreBOM: true }) : null
|
||||
|
||||
export function decodeApiDecoders(input, loose, format) {
|
||||
if (format === 'uint16') {
|
||||
if (!(input instanceof Uint16Array)) throw new TypeError('Expected an Uint16Array')
|
||||
} else if (format === 'uint8-le' || format === 'uint8-be') {
|
||||
assertU8(input)
|
||||
if (input.byteLength % 2 !== 0) throw new TypeError('Expected even number of bytes')
|
||||
} else {
|
||||
throw new TypeError('Unknown format')
|
||||
}
|
||||
|
||||
const le = format === 'uint8-le' || (format === 'uint16' && isLE)
|
||||
return (le ? (loose ? looseLE : fatalLE) : loose ? looseBE : fatalBE).decode(input)
|
||||
}
|
||||
|
||||
export function decodeApiJS(input, loose, format) {
|
||||
let u16
|
||||
switch (format) {
|
||||
case 'uint16':
|
||||
if (!(input instanceof Uint16Array)) throw new TypeError('Expected an Uint16Array')
|
||||
u16 = input
|
||||
break
|
||||
case 'uint8-le':
|
||||
assertU8(input)
|
||||
if (input.byteLength % 2 !== 0) throw new TypeError('Expected even number of bytes')
|
||||
u16 = to16input(input, true)
|
||||
break
|
||||
case 'uint8-be':
|
||||
assertU8(input)
|
||||
if (input.byteLength % 2 !== 0) throw new TypeError('Expected even number of bytes')
|
||||
u16 = to16input(input, false)
|
||||
break
|
||||
default:
|
||||
throw new TypeError('Unknown format')
|
||||
}
|
||||
|
||||
const str = decode(u16, loose, (!loose && isWellFormedStr) || (loose && toWellFormedStr))
|
||||
if (!loose && isWellFormedStr && !isWellFormedStr.call(str)) throw new TypeError(E_STRICT)
|
||||
if (loose && toWellFormedStr) return toWellFormedStr.call(str)
|
||||
|
||||
return str
|
||||
}
|
||||
|
||||
export function to16input(u8, le) {
|
||||
// Assume even number of bytes
|
||||
if (le === isLE) return to16(u8.byteOffset % 2 === 0 ? u8 : Uint8Array.from(u8))
|
||||
return to16(swap16(Uint8Array.from(u8)))
|
||||
}
|
||||
|
||||
export const decode = (u16, loose = false, checked = false) => {
|
||||
if (checked || isWellFormed(u16)) return decodeUCS2(u16)
|
||||
if (!loose) throw new TypeError(E_STRICT)
|
||||
return decodeUCS2(toWellFormed(Uint16Array.from(u16))) // cloned for replacement
|
||||
}
|
||||
|
||||
export function encode(str, loose = false, checked = false, swapped = false) {
|
||||
const arr = new Uint16Array(str.length)
|
||||
if (checked) return swapped ? encodeCheckedSwapped(str, arr) : encodeChecked(str, arr)
|
||||
return swapped ? encodeUncheckedSwapped(str, arr, loose) : encodeUnchecked(str, arr, loose)
|
||||
}
|
||||
|
||||
/* eslint-disable @exodus/mutable/no-param-reassign-prop-only */
|
||||
|
||||
// Assumes checked length % 2 === 0, otherwise does not swap tail
|
||||
function swap16(u8) {
|
||||
let i = 0
|
||||
for (const last3 = u8.length - 3; i < last3; i += 4) {
|
||||
const x0 = u8[i]
|
||||
const x1 = u8[i + 1]
|
||||
const x2 = u8[i + 2]
|
||||
const x3 = u8[i + 3]
|
||||
u8[i] = x1
|
||||
u8[i + 1] = x0
|
||||
u8[i + 2] = x3
|
||||
u8[i + 3] = x2
|
||||
}
|
||||
|
||||
for (const last = u8.length - 1; i < last; i += 2) {
|
||||
const x0 = u8[i]
|
||||
const x1 = u8[i + 1]
|
||||
u8[i] = x1
|
||||
u8[i + 1] = x0
|
||||
}
|
||||
|
||||
return u8
|
||||
}
|
||||
|
||||
// Splitting paths into small functions helps (at least on SpiderMonkey)
|
||||
|
||||
const encodeChecked = (str, arr) => encodeCharcodes(str, arr) // Same as encodeLatin1, but with Uint16Array
|
||||
|
||||
function encodeCheckedSwapped(str, arr) {
|
||||
// TODO: faster path for Hermes? See encodeCharcodes
|
||||
const length = str.length
|
||||
for (let i = 0; i < length; i++) {
|
||||
const x = str.charCodeAt(i)
|
||||
arr[i] = ((x & 0xff) << 8) | (x >> 8)
|
||||
}
|
||||
|
||||
return arr
|
||||
}
|
||||
|
||||
// lead: d800 - dbff, trail: dc00 - dfff
|
||||
|
||||
function encodeUnchecked(str, arr, loose = false) {
|
||||
// TODO: faster path for Hermes? See encodeCharcodes
|
||||
const length = str.length
|
||||
for (let i = 0; i < length; i++) {
|
||||
const code = str.charCodeAt(i)
|
||||
arr[i] = code
|
||||
if (code >= 0xd8_00 && code < 0xe0_00) {
|
||||
// An unexpected trail or a lead at the very end of input
|
||||
if (code > 0xdb_ff || i + 1 >= length) {
|
||||
if (!loose) throw new TypeError(E_STRICT_UNICODE)
|
||||
arr[i] = replacementCodepoint
|
||||
} else {
|
||||
const next = str.charCodeAt(i + 1) // Process valid pairs immediately
|
||||
if (next < 0xdc_00 || next >= 0xe0_00) {
|
||||
if (!loose) throw new TypeError(E_STRICT_UNICODE)
|
||||
arr[i] = replacementCodepoint
|
||||
} else {
|
||||
i++ // consume next
|
||||
arr[i] = next
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return arr
|
||||
}
|
||||
|
||||
function encodeUncheckedSwapped(str, arr, loose = false) {
|
||||
// TODO: faster path for Hermes? See encodeCharcodes
|
||||
const length = str.length
|
||||
for (let i = 0; i < length; i++) {
|
||||
const code = str.charCodeAt(i)
|
||||
arr[i] = ((code & 0xff) << 8) | (code >> 8)
|
||||
if (code >= 0xd8_00 && code < 0xe0_00) {
|
||||
// An unexpected trail or a lead at the very end of input
|
||||
if (code > 0xdb_ff || i + 1 >= length) {
|
||||
if (!loose) throw new TypeError(E_STRICT_UNICODE)
|
||||
arr[i] = replacementCodepointSwapped
|
||||
} else {
|
||||
const next = str.charCodeAt(i + 1) // Process valid pairs immediately
|
||||
if (next < 0xdc_00 || next >= 0xe0_00) {
|
||||
if (!loose) throw new TypeError(E_STRICT_UNICODE)
|
||||
arr[i] = replacementCodepointSwapped
|
||||
} else {
|
||||
i++ // consume next
|
||||
arr[i] = ((next & 0xff) << 8) | (next >> 8)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return arr
|
||||
}
|
||||
|
||||
// Only needed on Hermes, everything else has native impl
|
||||
export function toWellFormed(u16) {
|
||||
const length = u16.length
|
||||
for (let i = 0; i < length; i++) {
|
||||
const code = u16[i]
|
||||
if (code >= 0xd8_00 && code < 0xe0_00) {
|
||||
// An unexpected trail or a lead at the very end of input
|
||||
if (code > 0xdb_ff || i + 1 >= length) {
|
||||
u16[i] = replacementCodepoint
|
||||
} else {
|
||||
const next = u16[i + 1] // Process valid pairs immediately
|
||||
if (next < 0xdc_00 || next >= 0xe0_00) {
|
||||
u16[i] = replacementCodepoint
|
||||
} else {
|
||||
i++ // consume next
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return u16
|
||||
}
|
||||
|
||||
// Only needed on Hermes, everything else has native impl
|
||||
export function isWellFormed(u16) {
|
||||
const length = u16.length
|
||||
let i = 0
|
||||
|
||||
const m = 0x80_00_80_00
|
||||
const l = 0xd8_00
|
||||
const h = 0xe0_00
|
||||
|
||||
// Speedup with u32, by skipping to the first surrogate
|
||||
// Only implemented for aligned input for now, but almost all input is aligned (pooled Buffer or 0 offset)
|
||||
if (length > 32 && u16.byteOffset % 4 === 0) {
|
||||
const u32length = (u16.byteLength / 4) | 0
|
||||
const u32 = new Uint32Array(u16.buffer, u16.byteOffset, u32length)
|
||||
for (const last3 = u32length - 3; ; i += 4) {
|
||||
if (i >= last3) break // loop is fast enough for moving this here to be _very_ useful, likely due to array access checks
|
||||
const a = u32[i]
|
||||
const b = u32[i + 1]
|
||||
const c = u32[i + 2]
|
||||
const d = u32[i + 3]
|
||||
if (a & m || b & m || c & m || d & m) break // bitwise OR does not make this faster on Hermes
|
||||
}
|
||||
|
||||
for (; i < u32length; i++) if (u32[i] & m) break
|
||||
i *= 2
|
||||
}
|
||||
|
||||
// An extra loop gives ~30-40% speedup e.g. on English text without surrogates but with other symbols above 0x80_00
|
||||
for (const last3 = length - 3; ; i += 4) {
|
||||
if (i >= last3) break
|
||||
const a = u16[i]
|
||||
const b = u16[i + 1]
|
||||
const c = u16[i + 2]
|
||||
const d = u16[i + 3]
|
||||
if ((a >= l && a < h) || (b >= l && b < h) || (c >= l && c < h) || (d >= l && d < h)) break
|
||||
}
|
||||
|
||||
for (; i < length; i++) {
|
||||
const code = u16[i]
|
||||
if (code >= l && code < h) {
|
||||
// An unexpected trail or a lead at the very end of input
|
||||
if (code >= 0xdc_00 || i + 1 >= length) return false
|
||||
i++ // consume next
|
||||
const next = u16[i] // Process valid pairs immediately
|
||||
if (next < 0xdc_00 || next >= h) return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
2
node_modules/@exodus/bytes/fallback/utf8.auto.browser.js
generated
vendored
Normal file
2
node_modules/@exodus/bytes/fallback/utf8.auto.browser.js
generated
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
export const decodeFast = null
|
||||
export const encode = null
|
||||
1
node_modules/@exodus/bytes/fallback/utf8.auto.js
generated
vendored
Normal file
1
node_modules/@exodus/bytes/fallback/utf8.auto.js
generated
vendored
Normal file
@@ -0,0 +1 @@
|
||||
export { decodeFast, encode } from './utf8.js'
|
||||
1
node_modules/@exodus/bytes/fallback/utf8.auto.native.js
generated
vendored
Normal file
1
node_modules/@exodus/bytes/fallback/utf8.auto.native.js
generated
vendored
Normal file
@@ -0,0 +1 @@
|
||||
export { decodeFast, encode } from './utf8.js'
|
||||
270
node_modules/@exodus/bytes/fallback/utf8.js
generated
vendored
Normal file
270
node_modules/@exodus/bytes/fallback/utf8.js
generated
vendored
Normal file
@@ -0,0 +1,270 @@
|
||||
import { E_STRICT_UNICODE } from './_utils.js'
|
||||
import { isHermes } from './platform.js'
|
||||
import { asciiPrefix, decodeLatin1, encodeAsciiPrefix } from './latin1.js'
|
||||
|
||||
export const E_STRICT = 'Input is not well-formed utf8'
|
||||
|
||||
const replacementPoint = 0xff_fd
|
||||
const shouldUseEscapePath = isHermes // faster only on Hermes, js path beats it on normal engines
|
||||
const { decodeURIComponent, escape } = globalThis
|
||||
|
||||
export function decodeFast(arr, loose) {
|
||||
// Fast path for ASCII prefix, this is faster than all alternatives below
|
||||
const prefix = decodeLatin1(arr, 0, asciiPrefix(arr)) // No native decoder to use, so decodeAscii is useless here
|
||||
if (prefix.length === arr.length) return prefix
|
||||
|
||||
// This codepath gives a ~3x perf boost on Hermes
|
||||
if (shouldUseEscapePath && escape && decodeURIComponent) {
|
||||
const o = escape(decodeLatin1(arr, prefix.length, arr.length))
|
||||
try {
|
||||
return prefix + decodeURIComponent(o) // Latin1 to utf8
|
||||
} catch {
|
||||
if (!loose) throw new TypeError(E_STRICT)
|
||||
// Ok, we have to use manual implementation for loose decoder
|
||||
}
|
||||
}
|
||||
|
||||
return prefix + decode(arr, loose, prefix.length)
|
||||
}
|
||||
|
||||
// https://encoding.spec.whatwg.org/#utf-8-decoder
|
||||
// We are most likely in loose mode, for non-loose escape & decodeURIComponent solved everything
|
||||
export function decode(arr, loose, start = 0) {
|
||||
start |= 0
|
||||
const end = arr.length
|
||||
let out = ''
|
||||
const chunkSize = 0x2_00 // far below MAX_ARGUMENTS_LENGTH in npmjs.com/buffer, we use smaller chunks
|
||||
const tmpSize = Math.min(end - start, chunkSize + 1) // need 1 extra slot for last codepoint, which can be 2 charcodes
|
||||
const tmp = new Array(tmpSize).fill(0)
|
||||
let ti = 0
|
||||
|
||||
for (let i = start; i < end; i++) {
|
||||
if (ti >= chunkSize) {
|
||||
tmp.length = ti // can be larger by 1 if last codepoint is two charcodes
|
||||
out += String.fromCharCode.apply(String, tmp)
|
||||
if (tmp.length <= chunkSize) tmp.push(0) // restore 1 extra slot for last codepoint
|
||||
ti = 0
|
||||
}
|
||||
|
||||
const byte = arr[i]
|
||||
if (byte < 0x80) {
|
||||
tmp[ti++] = byte
|
||||
// ascii fast path is in decodeFast(), this is called only on non-ascii input
|
||||
// so we don't unroll this anymore
|
||||
} else if (byte < 0xc2) {
|
||||
if (!loose) throw new TypeError(E_STRICT)
|
||||
tmp[ti++] = replacementPoint
|
||||
} else if (byte < 0xe0) {
|
||||
// need 1 more
|
||||
if (i + 1 >= end) {
|
||||
if (!loose) throw new TypeError(E_STRICT)
|
||||
tmp[ti++] = replacementPoint
|
||||
break
|
||||
}
|
||||
|
||||
const byte1 = arr[i + 1]
|
||||
if (byte1 < 0x80 || byte1 > 0xbf) {
|
||||
if (!loose) throw new TypeError(E_STRICT)
|
||||
tmp[ti++] = replacementPoint
|
||||
continue
|
||||
}
|
||||
|
||||
i++
|
||||
tmp[ti++] = ((byte & 0x1f) << 6) | (byte1 & 0x3f)
|
||||
} else if (byte < 0xf0) {
|
||||
// need 2 more
|
||||
if (i + 1 >= end) {
|
||||
if (!loose) throw new TypeError(E_STRICT)
|
||||
tmp[ti++] = replacementPoint
|
||||
break
|
||||
}
|
||||
|
||||
const lower = byte === 0xe0 ? 0xa0 : 0x80
|
||||
const upper = byte === 0xed ? 0x9f : 0xbf
|
||||
const byte1 = arr[i + 1]
|
||||
if (byte1 < lower || byte1 > upper) {
|
||||
if (!loose) throw new TypeError(E_STRICT)
|
||||
tmp[ti++] = replacementPoint
|
||||
continue
|
||||
}
|
||||
|
||||
i++
|
||||
if (i + 1 >= end) {
|
||||
if (!loose) throw new TypeError(E_STRICT)
|
||||
tmp[ti++] = replacementPoint
|
||||
break
|
||||
}
|
||||
|
||||
const byte2 = arr[i + 1]
|
||||
if (byte2 < 0x80 || byte2 > 0xbf) {
|
||||
if (!loose) throw new TypeError(E_STRICT)
|
||||
tmp[ti++] = replacementPoint
|
||||
continue
|
||||
}
|
||||
|
||||
i++
|
||||
tmp[ti++] = ((byte & 0xf) << 12) | ((byte1 & 0x3f) << 6) | (byte2 & 0x3f)
|
||||
} else if (byte <= 0xf4) {
|
||||
// need 3 more
|
||||
if (i + 1 >= end) {
|
||||
if (!loose) throw new TypeError(E_STRICT)
|
||||
tmp[ti++] = replacementPoint
|
||||
break
|
||||
}
|
||||
|
||||
const lower = byte === 0xf0 ? 0x90 : 0x80
|
||||
const upper = byte === 0xf4 ? 0x8f : 0xbf
|
||||
const byte1 = arr[i + 1]
|
||||
if (byte1 < lower || byte1 > upper) {
|
||||
if (!loose) throw new TypeError(E_STRICT)
|
||||
tmp[ti++] = replacementPoint
|
||||
continue
|
||||
}
|
||||
|
||||
i++
|
||||
if (i + 1 >= end) {
|
||||
if (!loose) throw new TypeError(E_STRICT)
|
||||
tmp[ti++] = replacementPoint
|
||||
break
|
||||
}
|
||||
|
||||
const byte2 = arr[i + 1]
|
||||
if (byte2 < 0x80 || byte2 > 0xbf) {
|
||||
if (!loose) throw new TypeError(E_STRICT)
|
||||
tmp[ti++] = replacementPoint
|
||||
continue
|
||||
}
|
||||
|
||||
i++
|
||||
if (i + 1 >= end) {
|
||||
if (!loose) throw new TypeError(E_STRICT)
|
||||
tmp[ti++] = replacementPoint
|
||||
break
|
||||
}
|
||||
|
||||
const byte3 = arr[i + 1]
|
||||
if (byte3 < 0x80 || byte3 > 0xbf) {
|
||||
if (!loose) throw new TypeError(E_STRICT)
|
||||
tmp[ti++] = replacementPoint
|
||||
continue
|
||||
}
|
||||
|
||||
i++
|
||||
const codePoint =
|
||||
((byte & 0xf) << 18) | ((byte1 & 0x3f) << 12) | ((byte2 & 0x3f) << 6) | (byte3 & 0x3f)
|
||||
if (codePoint > 0xff_ff) {
|
||||
// split into char codes as String.fromCharCode is faster than String.fromCodePoint
|
||||
const u = codePoint - 0x1_00_00
|
||||
tmp[ti++] = 0xd8_00 + ((u >> 10) & 0x3_ff)
|
||||
tmp[ti++] = 0xdc_00 + (u & 0x3_ff)
|
||||
} else {
|
||||
tmp[ti++] = codePoint
|
||||
}
|
||||
// eslint-disable-next-line sonarjs/no-duplicated-branches
|
||||
} else {
|
||||
if (!loose) throw new TypeError(E_STRICT)
|
||||
tmp[ti++] = replacementPoint
|
||||
}
|
||||
}
|
||||
|
||||
if (ti === 0) return out
|
||||
tmp.length = ti
|
||||
return out + String.fromCharCode.apply(String, tmp)
|
||||
}
|
||||
|
||||
export function encode(string, loose) {
|
||||
const length = string.length
|
||||
let small = true
|
||||
let bytes = new Uint8Array(length) // assume ascii
|
||||
|
||||
let i = encodeAsciiPrefix(bytes, string)
|
||||
let p = i
|
||||
for (; i < length; i++) {
|
||||
let code = string.charCodeAt(i)
|
||||
if (code < 0x80) {
|
||||
bytes[p++] = code
|
||||
// Unroll the loop a bit for faster ops
|
||||
while (true) {
|
||||
i++
|
||||
if (i >= length) break
|
||||
code = string.charCodeAt(i)
|
||||
if (code >= 0x80) break
|
||||
bytes[p++] = code
|
||||
i++
|
||||
if (i >= length) break
|
||||
code = string.charCodeAt(i)
|
||||
if (code >= 0x80) break
|
||||
bytes[p++] = code
|
||||
i++
|
||||
if (i >= length) break
|
||||
code = string.charCodeAt(i)
|
||||
if (code >= 0x80) break
|
||||
bytes[p++] = code
|
||||
i++
|
||||
if (i >= length) break
|
||||
code = string.charCodeAt(i)
|
||||
if (code >= 0x80) break
|
||||
bytes[p++] = code
|
||||
}
|
||||
|
||||
if (i >= length) break
|
||||
// now, code is present and >= 0x80
|
||||
}
|
||||
|
||||
if (small) {
|
||||
// TODO: use resizable array buffers? will have to return a non-resizeable one
|
||||
if (p !== i) /* c8 ignore next */ throw new Error('Unreachable') // Here, p === i (only when small is still true)
|
||||
const bytesNew = new Uint8Array(p + (length - i) * 3) // maximium can be 3x of the string length in charcodes
|
||||
bytesNew.set(bytes)
|
||||
bytes = bytesNew
|
||||
small = false
|
||||
}
|
||||
|
||||
// surrogate, charcodes = [d800 + a & 3ff, dc00 + b & 3ff]; codePoint = 0x1_00_00 | (a << 10) | b
|
||||
// lead: d800 - dbff
|
||||
// trail: dc00 - dfff
|
||||
if (code >= 0xd8_00 && code < 0xe0_00) {
|
||||
// Can't be a valid trail as we already processed that below
|
||||
|
||||
if (code > 0xdb_ff || i + 1 >= length) {
|
||||
// An unexpected trail or a lead at the very end of input
|
||||
if (!loose) throw new TypeError(E_STRICT_UNICODE)
|
||||
bytes[p++] = 0xef
|
||||
bytes[p++] = 0xbf
|
||||
bytes[p++] = 0xbd
|
||||
continue
|
||||
}
|
||||
|
||||
const next = string.charCodeAt(i + 1) // Process valid pairs immediately
|
||||
if (next >= 0xdc_00 && next < 0xe0_00) {
|
||||
// here, codePoint is always between 0x1_00_00 and 0x11_00_00, we encode as 4 bytes
|
||||
const codePoint = (((code - 0xd8_00) << 10) | (next - 0xdc_00)) + 0x1_00_00
|
||||
bytes[p++] = (codePoint >> 18) | 0xf0
|
||||
bytes[p++] = ((codePoint >> 12) & 0x3f) | 0x80
|
||||
bytes[p++] = ((codePoint >> 6) & 0x3f) | 0x80
|
||||
bytes[p++] = (codePoint & 0x3f) | 0x80
|
||||
i++ // consume next
|
||||
} else {
|
||||
// Next is not a trail, leave next unconsumed but process unmatched lead error
|
||||
if (!loose) throw new TypeError(E_STRICT_UNICODE)
|
||||
bytes[p++] = 0xef
|
||||
bytes[p++] = 0xbf
|
||||
bytes[p++] = 0xbd
|
||||
}
|
||||
|
||||
continue
|
||||
}
|
||||
|
||||
// We are left with a non-pair char code above ascii, it gets encoded to 2 or 3 bytes
|
||||
if (code < 0x8_00) {
|
||||
bytes[p++] = (code >> 6) | 0xc0
|
||||
bytes[p++] = (code & 0x3f) | 0x80
|
||||
} else {
|
||||
bytes[p++] = (code >> 12) | 0xe0
|
||||
bytes[p++] = ((code >> 6) & 0x3f) | 0x80
|
||||
bytes[p++] = (code & 0x3f) | 0x80
|
||||
}
|
||||
}
|
||||
|
||||
return bytes.length === p ? bytes : bytes.slice(0, p)
|
||||
}
|
||||
Reference in New Issue
Block a user