From b5d156317464088ce6e6311d0247f6a74e4b3fd2 Mon Sep 17 00:00:00 2001
From: Nikita Skovoroda <chalkerx@gmail.com>
Date: Sat, 10 Jan 2026 06:52:22 +0400
Subject: [PATCH 1/3] feat: add multi-byte encoders except iso-2022-jp

---
 fallback/multi-byte.js          | 290 +++++++++++++++++++++++++++++++-
 fallback/multi-byte.table.js    |   1 +
 multi-byte.js                   |  13 +-
 multi-byte.node.js              |  14 +-
 tests/multi-byte.encode.test.js | 264 +++++++++++++++++++++++++++++
 tests/wpt/loader.cjs            |  16 ++
 6 files changed, 593 insertions(+), 5 deletions(-)
 create mode 100644 tests/multi-byte.encode.test.js

diff --git a/fallback/multi-byte.js b/fallback/multi-byte.js
index 847d3a1..d829782 100644
--- a/fallback/multi-byte.js
+++ b/fallback/multi-byte.js
@@ -1,9 +1,9 @@
-import { asciiPrefix, decodeAscii, decodeLatin1, decodeUCS2 } from './latin1.js'
+import { asciiPrefix, decodeAscii, decodeLatin1, decodeUCS2, encodeAscii } from './latin1.js'
 import { getTable } from './multi-byte.table.js'
 
 export const E_STRICT = 'Input is not well-formed for this encoding'
 
-// TODO: optimize
+/* Decoders */
 
 // If the decoder is not cleared properly, state can be preserved between non-streaming calls!
 // See comment about fatal stream
@@ -504,3 +504,289 @@ export function multibyteDecoder(enc, loose = false) {
     return res + mapper.decode(arr, res.length, arr.length, stream)
   }
 }
+
+/* Encoders */
+
+// TODO: optimize, check memory usage?
+// TODO: just precalculate all bytes and store offsets in one large u8?
+
+const e7 = new Map([[148, 236], [149, 237], [150, 243]]) // prettier-ignore
+const e8 = new Map([[30, 89], [38, 97], [43, 102], [44, 103], [50, 109], [67, 126], [84, 144], [100, 160]]) // prettier-ignore
+const maps = new Map()
+
+// We accept that encoders use non-trivial amount of mem, for perf
+// most are are 128 KiB mem, big5 is 380 KiB, lazy-loaded at first use
+function getMap(id, name = id) {
+  const cached = maps.get(id)
+  if (cached) return cached
+  const table = getTable(name)
+  const map = new Uint16Array(id === 'big5' ? 0x2_f8_a7 : 0xff_e7) // max codepoint in table + 1
+  for (let i = 0; i < table.length; i++) {
+    const c = table[i]
+    if (c === REP || c === undefined) continue
+    if (id === 'big5') {
+      if (i < 5024) continue // this also skips multi-codepoint strings
+      // In big5, all return first entries except for these
+      if (
+        map[c] &&
+        c !== 0x25_50 &&
+        c !== 0x25_5e &&
+        c !== 0x25_61 &&
+        c !== 0x25_6a &&
+        c !== 0x53_41 &&
+        c !== 0x53_45
+      ) {
+        continue
+      }
+    } else {
+      if (id === 'shift_jis' && i >= 8272 && i <= 8835) continue
+      if (map[c]) continue
+    }
+
+    if (typeof c === 'string') {
+      // always a single codepoint here
+      map[c.codePointAt(0)] = 1 + i
+    } else if (c !== REP) {
+      map[c] = 1 + i
+    }
+  }
+
+  if (id === 'shift_jis' || id === 'euc-jp') map[0x22_12] = map[0xff_0d]
+  maps.set(id, map)
+  return map
+}
+
+/* eslint-disable @exodus/mutable/no-param-reassign-prop-only */
+
+const encoders = {
+  big5: (err) => {
+    const map = getMap('big5')
+    const encode = (u8, i, cp) => {
+      let p = map[cp]
+      if (!p) return err(cp)
+      p--
+      const t = p % 157
+      u8[i] = 0x81 + ((p / 157) | 0)
+      u8[i + 1] = (t < 0x3f ? 0x40 : 0x62) + t
+      return 2
+    }
+
+    return { encode, ascii: 0x80 }
+  },
+  'euc-kr': (err) => {
+    const map = getMap('euc-kr')
+    const encode = (u8, i, cp) => {
+      let p = map[cp]
+      if (!p) return err(cp)
+      p--
+      u8[i] = 0x81 + ((p / 190) | 0)
+      u8[i + 1] = (p % 190) + 0x41
+      return 2
+    }
+
+    return { encode, ascii: 0x80 }
+  },
+  'euc-jp': (err) => {
+    const map = getMap('euc-jp', 'jis0208')
+    const encode = (u8, i, cp) => {
+      if (cp === 0xa5) {
+        u8[i] = 0x5c
+        return 1
+      }
+
+      if (cp === 0x20_3e) {
+        u8[i] = 0x7e
+        return 1
+      }
+
+      if (cp >= 0xff_61 && cp <= 0xff_9f) {
+        u8[i] = 0x8e
+        u8[i + 1] = cp - 0xfe_c0
+        return 2
+      }
+
+      let p = map[cp]
+      if (!p) return err(cp)
+      p--
+      u8[i] = ((p / 94) | 0) + 0xa1
+      u8[i + 1] = (p % 94) + 0xa1
+      return 2
+    }
+
+    return { encode, ascii: 0x80 }
+  },
+  shift_jis: (err) => {
+    const map = getMap('shift_jis', 'jis0208')
+    const encode = (u8, i, cp) => {
+      if (cp === 0xa5) {
+        u8[i] = 0x5c
+        return 1
+      }
+
+      if (cp === 0x20_3e) {
+        u8[i] = 0x7e
+        return 1
+      }
+
+      if (cp >= 0xff_61 && cp <= 0xff_9f) {
+        u8[i] = cp - 0xfe_c0
+        return 1
+      }
+
+      let p = map[cp]
+      if (!p) return err(cp)
+      p--
+      const l = (p / 188) | 0
+      const t = p % 188
+      u8[i] = (l < 0x1f ? 0x81 : 0xc1) + l
+      u8[i + 1] = (t < 0x3f ? 0x40 : 0x41) + t
+      return 2
+    }
+
+    return { encode, ascii: 0x81 }
+  },
+  gbk: (err) => {
+    const map = getMap('gb18030')
+
+    const encode = (u8, i, cp) => {
+      if (cp === 0xe5_e5) return err(cp)
+      if (cp === 0x20_ac) {
+        u8[i] = 0x80
+        return 1
+      }
+
+      if (cp >= 0xe7_8d && cp <= 0xe8_64) {
+        if (cp <= 0xe7_93) {
+          u8[i] = 0xa6
+          u8[i + 1] = cp - 0xe6_b4
+          return 2
+        }
+
+        const l = cp < 0xe8_00 ? 0xa6 : 0xfe
+        const t = (l === 0xa6 ? e7 : e8).get(cp & 0xff)
+        if (t) {
+          u8[i] = l
+          u8[i + 1] = t
+          return 2
+        }
+      }
+
+      let p = map[cp]
+      if (p) {
+        p--
+        const t = p % 190
+        u8[i] = 0x81 + ((p / 190) | 0)
+        u8[i + 1] = (t < 0x3f ? 0x40 : 0x41) + t
+        return 2
+      }
+
+      return err(cp)
+    }
+
+    return { encode, ascii: 0x80, width: 2 }
+  },
+  gb18030: (err) => {
+    const map = getMap('gb18030')
+    const gb18030r = getTable('gb18030-ranges')
+
+    const encode = (u8, i, cp) => {
+      if (cp === 0xe5_e5) return err(cp)
+      if (cp >= 0xe7_8d && cp <= 0xe8_64) {
+        if (cp <= 0xe7_93) {
+          u8[i] = 0xa6
+          u8[i + 1] = cp - 0xe6_b4
+          return 2
+        }
+
+        const l = cp < 0xe8_00 ? 0xa6 : 0xfe
+        const t = (l === 0xa6 ? e7 : e8).get(cp & 0xff)
+        if (t) {
+          u8[i] = l
+          u8[i + 1] = t
+          return 2
+        }
+      }
+
+      let p = map[cp]
+      if (p) {
+        p--
+        const t = p % 190
+        u8[i] = 0x81 + ((p / 190) | 0)
+        u8[i + 1] = (t < 0x3f ? 0x40 : 0x41) + t
+        return 2
+      }
+
+      let a = 0, b = 0 // prettier-ignore
+      for (const [c, d] of gb18030r) {
+        if (d > cp) break
+        a = c
+        b = d
+      }
+
+      let rp = cp === 0xe7_c7 ? 7457 : a + cp - b
+      u8[i] = 0x81 + ((rp / 12_600) | 0)
+      rp %= 12_600
+      u8[i + 1] = 0x30 + ((rp / 1260) | 0)
+      rp %= 1260
+      u8[i + 2] = 0x81 + ((rp / 10) | 0)
+      u8[i + 3] = 0x30 + (rp % 10)
+      return 4
+    }
+
+    return { encode, ascii: 0x80, width: 4 }
+  },
+}
+
+/* eslint-enable @exodus/mutable/no-param-reassign-prop-only */
+
+const NON_LATIN = /[^\x00-\xFF]/ // eslint-disable-line no-control-regex
+
+export function multibyteEncoder(enc) {
+  if (!Object.hasOwn(encoders, enc)) throw new RangeError('Unsupported encoding')
+
+  // eslint-disable-next-line unicorn/consistent-function-scoping
+  const onErr = (code) => {
+    throw new TypeError(E_STRICT)
+  }
+
+  const { encode, ascii, width = 2 } = encoders[enc](onErr)
+  return (str) => {
+    if (!NON_LATIN.test(str)) {
+      try {
+        return encodeAscii(str, E_STRICT)
+      } catch {}
+    }
+
+    const length = str.length
+    const u8 = new Uint8Array(length * width)
+    let i = 0
+    while (i < length) {
+      const x0 = str.charCodeAt(i)
+      if (x0 >= 128) break
+      u8[i++] = x0
+    }
+
+    for (let j = i; j < length; j++) {
+      const x0 = str.charCodeAt(j)
+      if (x0 < ascii) {
+        u8[i++] = x0
+      } else if (x0 >= 0xd8_00 && x0 < 0xe0_00) {
+        if (x0 >= 0xdc_00 || j + 1 === length) {
+          onErr(x0) // Lone surrogate, TODO: how to handle this in non-strict?
+        } else {
+          const x1 = str.charCodeAt(j + 1)
+          if (x1 < 0xdc_00 || x1 > 0xe0_00) {
+            onErr(x0) // Lone surrogate, TODO: how to handle this in non-strict?
+          } else {
+            j++ // consume x1
+            i += encode(u8, i, 0x1_00_00 + ((x1 & 0x3_ff) | ((x0 & 0x3_ff) << 10)))
+          }
+        }
+      } else {
+        i += encode(u8, i, x0)
+      }
+    }
+
+    return i === u8.length ? u8 : u8.subarray(0, i)
+  }
+}
diff --git a/fallback/multi-byte.table.js b/fallback/multi-byte.table.js
index 1391eb0..967ff68 100644
--- a/fallback/multi-byte.table.js
+++ b/fallback/multi-byte.table.js
@@ -104,6 +104,7 @@ export function getTable(id) {
     res = new Array(sizes[id]) // array of strings or undefined
     unwrap(res, indices[id], 0, true)
     // Pointer code updates are embedded into the table
+    // These are skipped in encoder as encoder uses only pointers >= (0xA1 - 0x81) * 157
     res[1133] = '\xCA\u0304'
     res[1135] = '\xCA\u030C'
     res[1164] = '\xEA\u0304'
diff --git a/multi-byte.js b/multi-byte.js
index a6ebd19..fc485ec 100644
--- a/multi-byte.js
+++ b/multi-byte.js
@@ -1,5 +1,6 @@
 import { assertUint8 } from './assert.js'
-import { multibyteDecoder } from './fallback/multi-byte.js'
+import { E_STRING } from './fallback/_utils.js'
+import { multibyteDecoder, multibyteEncoder } from './fallback/multi-byte.js'
 
 export function createMultibyteDecoder(encoding, loose = false) {
   const jsDecoder = multibyteDecoder(encoding, loose) // asserts
@@ -11,3 +12,13 @@ export function createMultibyteDecoder(encoding, loose = false) {
     return jsDecoder(arr, stream)
   }
 }
+
+export function createMultibyteEncoder(encoding, { mode = 'fatal' } = {}) {
+  // TODO: replacement, truncate (replacement will need varying length)
+  if (mode !== 'fatal') throw new Error('Unsupported mode')
+  const jsEncoder = multibyteEncoder(encoding) // asserts
+  return (s) => {
+    if (typeof s !== 'string') throw new TypeError(E_STRING)
+    return jsEncoder(s)
+  }
+}
diff --git a/multi-byte.node.js b/multi-byte.node.js
index 1cdb28d..f6e2bd5 100644
--- a/multi-byte.node.js
+++ b/multi-byte.node.js
@@ -1,6 +1,6 @@
 import { assertUint8 } from './assert.js'
-import { isDeno, toBuf } from './fallback/_utils.js'
-import { isAsciiSuperset, multibyteDecoder } from './fallback/multi-byte.js'
+import { isDeno, toBuf, E_STRING } from './fallback/_utils.js'
+import { isAsciiSuperset, multibyteDecoder, multibyteEncoder } from './fallback/multi-byte.js'
 import { isAscii } from 'node:buffer'
 
 export function createMultibyteDecoder(encoding, loose = false) {
@@ -21,3 +21,13 @@ export function createMultibyteDecoder(encoding, loose = false) {
     return jsDecoder(arr, stream)
   }
 }
+
+export function createMultibyteEncoder(encoding, { mode = 'fatal' } = {}) {
+  // TODO: replacement, truncate (replacement will need varying length)
+  if (mode !== 'fatal') throw new Error('Unsupported mode')
+  const jsEncoder = multibyteEncoder(encoding) // asserts
+  return (s) => {
+    if (typeof s !== 'string') throw new TypeError(E_STRING)
+    return jsEncoder(s)
+  }
+}
diff --git a/tests/multi-byte.encode.test.js b/tests/multi-byte.encode.test.js
new file mode 100644
index 0000000..9b930dc
--- /dev/null
+++ b/tests/multi-byte.encode.test.js
@@ -0,0 +1,264 @@
+import { createMultibyteDecoder, createMultibyteEncoder } from '@exodus/bytes/multi-byte.js'
+import { test, describe } from 'node:test'
+import { readFileSync } from 'node:fs'
+import { join } from 'node:path'
+
+describe('multi-byte encodings are supersets of ascii', () => {
+  // Except iso-2022-jp
+  for (const encoding of ['big5', 'euc-kr', 'euc-jp', 'shift_jis', 'gbk', 'gb18030']) {
+    test(encoding, (t) => {
+      const decoder = createMultibyteDecoder(encoding)
+      const encoder = createMultibyteEncoder(encoding)
+      for (let i = 0; i < 128; i++) {
+        let str
+        try {
+          str = decoder(Uint8Array.of(i))
+        } catch (cause) {
+          throw new Error(`Error decoding ${i} in ${encoding}`, { cause })
+        }
+
+        t.assert.strictEqual(str.length, 1, i)
+        t.assert.strictEqual(str.codePointAt(0), i, i)
+
+        t.assert.deepStrictEqual(encoder(str), Uint8Array.of(i))
+      }
+    })
+  }
+})
+
+// https://encoding.spec.whatwg.org/#gb18030-encoder step 5
+const gbExceptions = {
+  E78D: Uint8Array.of(0xa6, 0xd9),
+  E78E: Uint8Array.of(0xa6, 0xda),
+  E78F: Uint8Array.of(0xa6, 0xdb),
+  E790: Uint8Array.of(0xa6, 0xdc),
+  E791: Uint8Array.of(0xa6, 0xdd),
+  E792: Uint8Array.of(0xa6, 0xde),
+  E793: Uint8Array.of(0xa6, 0xdf),
+  E794: Uint8Array.of(0xa6, 0xec),
+  E795: Uint8Array.of(0xa6, 0xed),
+  E796: Uint8Array.of(0xa6, 0xf3),
+  E81E: Uint8Array.of(0xfe, 0x59),
+  E826: Uint8Array.of(0xfe, 0x61),
+  E82B: Uint8Array.of(0xfe, 0x66),
+  E82C: Uint8Array.of(0xfe, 0x67),
+  E832: Uint8Array.of(0xfe, 0x6d),
+  E843: Uint8Array.of(0xfe, 0x7e),
+  E854: Uint8Array.of(0xfe, 0x90),
+  E864: Uint8Array.of(0xfe, 0xa0),
+}
+
+describe('specific tests', () => {
+  test('big5', (t) => {
+    const enc = createMultibyteEncoder('big5')
+    const dec = createMultibyteDecoder('big5')
+
+    // https://encoding.spec.whatwg.org/#index-big5-pointer
+    // If codePoint is U+2550 (═), U+255E (╞), U+2561 (╡), U+256A (╪), U+5341 (十), or U+5345 (卅),
+    // then return the last pointer corresponding to codePoint in index.
+
+    t.assert.strictEqual(dec(Uint8Array.of(0xa2, 0xa4)), String.fromCodePoint(0x25_50)) // 5247
+    t.assert.strictEqual(dec(Uint8Array.of(0xf9, 0xf9)), String.fromCodePoint(0x25_50)) // 18991
+    t.assert.deepStrictEqual(enc(String.fromCodePoint(0x25_50)), Uint8Array.of(0xf9, 0xf9)) // 18991
+
+    t.assert.strictEqual(dec(Uint8Array.of(0xa2, 0xa5)), String.fromCodePoint(0x25_5e)) // 5248
+    t.assert.strictEqual(dec(Uint8Array.of(0xf9, 0xe9)), String.fromCodePoint(0x25_5e)) // 18975
+    t.assert.deepStrictEqual(enc(String.fromCodePoint(0x25_5e)), Uint8Array.of(0xf9, 0xe9)) // 18975
+
+    t.assert.strictEqual(dec(Uint8Array.of(0xa2, 0xa7)), String.fromCodePoint(0x25_61)) // 5250
+    t.assert.strictEqual(dec(Uint8Array.of(0xf9, 0xeb)), String.fromCodePoint(0x25_61)) // 18977
+    t.assert.deepStrictEqual(enc(String.fromCodePoint(0x25_61)), Uint8Array.of(0xf9, 0xeb)) // 18977
+
+    t.assert.strictEqual(dec(Uint8Array.of(0xa2, 0xa6)), String.fromCodePoint(0x25_6a)) // 5249
+    t.assert.strictEqual(dec(Uint8Array.of(0xf9, 0xea)), String.fromCodePoint(0x25_6a)) // 18976
+    t.assert.deepStrictEqual(enc(String.fromCodePoint(0x25_6a)), Uint8Array.of(0xf9, 0xea)) // 18976
+
+    t.assert.strictEqual(dec(Uint8Array.of(0xa2, 0xcc)), String.fromCodePoint(0x53_41)) // 5287
+    t.assert.strictEqual(dec(Uint8Array.of(0xa4, 0x51)), String.fromCodePoint(0x53_41)) // 5512
+    t.assert.deepStrictEqual(enc(String.fromCodePoint(0x53_41)), Uint8Array.of(0xa4, 0x51)) // 5512
+
+    t.assert.strictEqual(dec(Uint8Array.of(0xa2, 0xce)), String.fromCodePoint(0x53_45)) // 5289
+    t.assert.strictEqual(dec(Uint8Array.of(0xa4, 0xca)), String.fromCodePoint(0x53_45)) // 5599
+    t.assert.deepStrictEqual(enc(String.fromCodePoint(0x53_45)), Uint8Array.of(0xa4, 0xca)) // 5599
+
+    // But not others, which return first codepoint in index
+    t.assert.strictEqual(dec(Uint8Array.of(0xa1, 0xb2)), String.fromCodePoint(0x30_03)) // 5104
+    t.assert.strictEqual(dec(Uint8Array.of(0xc6, 0xde)), String.fromCodePoint(0x30_03)) // 10957
+    t.assert.deepStrictEqual(enc(String.fromCodePoint(0x30_03)), Uint8Array.of(0xa1, 0xb2)) // 5104
+
+    t.assert.strictEqual(dec(Uint8Array.of(0xa2, 0xcd)), String.fromCodePoint(0x53_44)) // 5288
+    t.assert.strictEqual(dec(Uint8Array.of(0xfa, 0xc5)), String.fromCodePoint(0x53_44)) // 19096
+    t.assert.deepStrictEqual(enc(String.fromCodePoint(0x53_44)), Uint8Array.of(0xa2, 0xcd)) // 5288
+  })
+
+  test('shift_jis', (t) => {
+    const enc = createMultibyteEncoder('shift_jis')
+    const dec = createMultibyteDecoder('shift_jis')
+
+    // https://encoding.spec.whatwg.org/#shift_jis-encoder
+    t.assert.deepStrictEqual(enc('\u007F'), Uint8Array.of(0x7f))
+    t.assert.deepStrictEqual(enc('\u0080'), Uint8Array.of(0x80)) // If codePoint is an ASCII code point or U+0080, then return a byte whose value is codePoint.
+    t.assert.deepStrictEqual(enc('\u00A5'), Uint8Array.of(0x5c)) // If codePoint is U+00A5 (¥), then return byte 0x5C.
+    t.assert.deepStrictEqual(enc('\u203E'), Uint8Array.of(0x7e)) // If codePoint is U+203E (‾), then return byte 0x7E.
+    t.assert.deepStrictEqual(enc('\u2212'), enc('\uFF0D')) // If codePoint is U+2212 (−), then set it to U+FF0D (－).
+    t.assert.strictEqual(dec(enc('\uFF0D')), '\uFF0D')
+    t.assert.strictEqual(dec(enc('\u2212')), '\uFF0D')
+
+    for (let i = 0xff_61; i <= 0xff_9f; i++) {
+      const str = String.fromCodePoint(i)
+      t.assert.deepStrictEqual(enc(str), Uint8Array.of(i - 0xff_61 + 0xa1))
+      t.assert.strictEqual(dec(enc(str)), str)
+    }
+  })
+
+  test('euc-jp', (t) => {
+    const enc = createMultibyteEncoder('euc-jp')
+    const dec = createMultibyteDecoder('euc-jp')
+
+    // https://encoding.spec.whatwg.org/#euc-jp-encoder
+    t.assert.deepStrictEqual(enc('\u007F'), Uint8Array.of(0x7f))
+    t.assert.throws(() => enc('\u0080'))
+    t.assert.deepStrictEqual(enc('\u00A5'), Uint8Array.of(0x5c)) // If codePoint is U+00A5 (¥), then return byte 0x5C.
+    t.assert.deepStrictEqual(enc('\u203E'), Uint8Array.of(0x7e)) // If codePoint is U+203E (‾), then return byte 0x7E.
+    t.assert.deepStrictEqual(enc('\u2212'), enc('\uFF0D')) // If codePoint is U+2212 (−), then set it to U+FF0D (－).
+    t.assert.strictEqual(dec(enc('\uFF0D')), '\uFF0D')
+    t.assert.strictEqual(dec(enc('\u2212')), '\uFF0D')
+    for (let i = 0xff_61; i <= 0xff_9f; i++) {
+      const str = String.fromCodePoint(i)
+      t.assert.deepStrictEqual(enc(str), Uint8Array.of(0x8e, i - 0xff_61 + 0xa1))
+      t.assert.strictEqual(dec(enc(str)), str)
+    }
+  })
+
+  test('euc-kr', (t) => {
+    const enc = createMultibyteEncoder('euc-kr')
+
+    // https://encoding.spec.whatwg.org/#euc-kr-encoder
+    t.assert.deepStrictEqual(enc('\u007F'), Uint8Array.of(0x7f))
+    t.assert.throws(() => enc('\u0080'))
+  })
+
+  test('gb18030, gbk', (t) => {
+    // gb18030 can encode replacement
+    t.assert.throws(() => createMultibyteEncoder('gbk')('\uFFFD')) // gbk can't encode it
+    const rep = createMultibyteEncoder('gb18030')('\uFFFD')
+    t.assert.strictEqual(createMultibyteDecoder('gb18030')(rep), '\uFFFD')
+    t.assert.deepStrictEqual(rep, Uint8Array.of(0x84, 0x31, 0xa4, 0x37)) // pointer 39417, valid representation for the replacement char
+
+    // https://encoding.spec.whatwg.org/#gb18030-encoder
+    // 3. If codePoint is U+E5E5, then return error with codePoint.
+    t.assert.throws(() => createMultibyteEncoder('gbk')('\uE5E5')) // not present in index so doesn't need special handling
+    t.assert.throws(() => createMultibyteEncoder('gb18030')('\uE5E5')) // excluded from ranges via a specific check
+
+    // gbk and gb18030 encode U+20AC differently, but decode both variants
+    // https://encoding.spec.whatwg.org/#gb18030-encoder
+    // 4. If is GBK is true and codePoint is U+20AC (€), then return byte 0x80.
+    t.assert.deepStrictEqual(createMultibyteEncoder('gb18030')('\u20AC'), Uint8Array.of(0xa2, 0xe3))
+    t.assert.deepStrictEqual(createMultibyteEncoder('gbk')('\u20AC'), Uint8Array.of(0x80))
+    t.assert.strictEqual(createMultibyteDecoder('gb18030')(Uint8Array.of(0xa2, 0xe3)), '\u20AC')
+    t.assert.strictEqual(createMultibyteDecoder('gb18030')(Uint8Array.of(0x80)), '\u20AC')
+    t.assert.strictEqual(createMultibyteDecoder('gbk')(Uint8Array.of(0xa2, 0xe3)), '\u20AC')
+    t.assert.strictEqual(createMultibyteDecoder('gbk')(Uint8Array.of(0x80)), '\u20AC')
+
+    for (const encoding of ['gb18030', 'gbk']) {
+      const enc = createMultibyteEncoder(encoding)
+      for (const [hex, u8] of Object.entries(gbExceptions)) {
+        t.assert.doesNotThrow(
+          () => t.assert.deepStrictEqual(enc(String.fromCodePoint(parseInt(hex, 16))), u8),
+          `${encoding}(U+${hex})`
+        )
+      }
+    }
+  })
+})
+
+function loadTable(encoding, t) {
+  const text = readFileSync(
+    join(import.meta.dirname, 'encoding/fixtures/multi-byte', `index-${encoding}.txt`),
+    'utf8'
+  )
+
+  const rows = text
+    .split('\n')
+    .map((x) => x.trim())
+    .filter((x) => x && x[0] !== '#')
+    .map((x) => x.split('\t'))
+    .map(([istr, codeHex, description]) => {
+      const i = Number(istr)
+      const code = parseInt(codeHex.slice(2), 16)
+      t.assert.strictEqual(`${i}`, istr)
+      t.assert.strictEqual('0x' + code.toString(16).padStart(4, '0').toUpperCase(), codeHex)
+      return { i, code, description }
+    })
+
+  t.assert.strictEqual(rows.length, new Set(rows.map((row) => row.i)).size) // all unique
+  return rows
+}
+
+describe('roundtrip, tables', () => {
+  const encodings = {
+    big5: 'big5',
+    shift_jis: 'jis0208',
+    'euc-jp': 'jis0208',
+    'euc-kr': 'euc-kr',
+    gbk: 'gb18030',
+    gb18030: 'gb18030',
+  }
+
+  for (const [encoding, tableID] of Object.entries(encodings)) {
+    test(encoding, (t) => {
+      const enc = createMultibyteEncoder(encoding)
+      const dec = createMultibyteDecoder(encoding)
+      const table = loadTable(tableID, t)
+      const last = new Map(table.map(({ i, code }) => [code, i]))
+      for (const { i, code, description } of table) {
+        const str = String.fromCodePoint(code)
+
+        // https://encoding.spec.whatwg.org/#index-big5-pointer excludes low pointers
+        if (encoding === 'big5' && i < (0xa1 - 0x81) * 157) {
+          // If last seen with that code is in low pointer range, it should throw
+          if (last.get(code) === i) t.assert.throws(() => enc(str), description)
+          continue
+        }
+
+        t.assert.doesNotThrow(() => t.assert.strictEqual(dec(enc(str)), str), description)
+      }
+    })
+  }
+})
+
+describe('roundtrip, full Unicode', () => {
+  const MAX = 0x10_ff_ff // Max Unicode codepoint
+
+  test('gb18030', { timeout: 60_000 }, (t) => {
+    const enc = createMultibyteEncoder('gb18030')
+    const dec = createMultibyteDecoder('gb18030')
+
+    for (let i = 0; i <= MAX; i++) {
+      const s = String.fromCodePoint(i)
+      const id = `U+${i.toString(16).toUpperCase()}`
+      if (i >= 0xd8_00 && i <= 0xdf_ff) {
+        // Surrogates
+        t.assert.throws(() => enc(s), `Surrogate ${id}`)
+        continue
+      }
+
+      // https://encoding.spec.whatwg.org/#gb18030-encoder step 3. If codePoint is U+E5E5, then return error with codePoint.
+      if (i === 0xe5_e5) {
+        t.assert.throws(() => enc(s), id)
+        continue
+      }
+
+      let u8
+      t.assert.doesNotThrow(() => {
+        u8 = enc(s)
+      }, id)
+
+      if (Object.hasOwn(gbExceptions, i.toString(16).toUpperCase())) {
+        t.assert.deepStrictEqual(u8, gbExceptions[i.toString(16).toUpperCase()], id)
+      } else {
+        t.assert.strictEqual(dec(u8), s, id)
+      }
+    }
+  })
+})
diff --git a/tests/wpt/loader.cjs b/tests/wpt/loader.cjs
index 36d94e0..e8271ba 100644
--- a/tests/wpt/loader.cjs
+++ b/tests/wpt/loader.cjs
@@ -2,6 +2,7 @@ const assert = require('node:assert/strict')
 const fs = require('node:fs')
 const path = require('node:path')
 const { describe, test } = require('node:test')
+const { createMultibyteEncoder } = require('@exodus/bytes/multi-byte.js')
 
 // TextDecoderStream / TextEncoderStream implementations expect Streams to be present
 if (!globalThis.ReadableStream) {
@@ -146,6 +147,8 @@ function loadTextDecoderHtml(fullName) {
     assert.ok(encoding && encoding.length > 0)
     const decoder = new globalThis.TextDecoder(encoding)
     const fatal = new globalThis.TextDecoder(encoding, { fatal: true })
+    const encode =
+      decoder.encoding === 'iso-2022-jp' ? null : createMultibyteEncoder(decoder.encoding) // TODO: iso-2022-jp
 
     if (fullName.endsWith('_errors.html')) {
       const sep0 = '<span>'
@@ -216,6 +219,19 @@ function loadTextDecoderHtml(fullName) {
           t.assert.strictEqual(fatal.decode(bytes), expected, `${bytesHex} => U+${cpHex}`)
         }
 
+        // Test encoder
+        // This is limited, encoders are asymmetrical
+        if (
+          !(decoder.encoding === 'euc-jp' && bytes.length === 3) && // no jis0212 encoding in spec
+          !(decoder.encoding === 'big5' && bytes[0] > 0x7f && bytes[0] <= 0xa0) && // encoding excludes pointers less than (0xA1 - 0x81) × 157.
+          decoder.encoding !== 'iso-2022-jp' // Not implemented yet
+        ) {
+          t.assert.doesNotThrow(
+            () => t.assert.deepEqual(encode(String.fromCodePoint(cp)), bytes),
+            `encode U+${cpHex} => ${bytesHex}`
+          )
+        }
+
         tested++
       }
 

From 79cf7bbaf0eac8fe6ad888d2390cb99cae2a2074 Mon Sep 17 00:00:00 2001
From: Nikita Skovoroda <chalkerx@gmail.com>
Date: Sun, 11 Jan 2026 10:59:02 +0400
Subject: [PATCH 2/3] perf: improve multi-byte encoders perf

---
 fallback/multi-byte.js | 368 +++++++++++++++++------------------------
 multi-byte.js          |   7 +-
 multi-byte.node.js     |   8 +-
 3 files changed, 151 insertions(+), 232 deletions(-)

diff --git a/fallback/multi-byte.js b/fallback/multi-byte.js
index d829782..e01bc11 100644
--- a/fallback/multi-byte.js
+++ b/fallback/multi-byte.js
@@ -1,3 +1,4 @@
+import { E_STRING } from './_utils.js'
 import { asciiPrefix, decodeAscii, decodeLatin1, decodeUCS2, encodeAscii } from './latin1.js'
 import { getTable } from './multi-byte.table.js'
 
@@ -507,20 +508,36 @@ export function multibyteDecoder(enc, loose = false) {
 
 /* Encoders */
 
-// TODO: optimize, check memory usage?
-// TODO: just precalculate all bytes and store offsets in one large u8?
-
-const e7 = new Map([[148, 236], [149, 237], [150, 243]]) // prettier-ignore
-const e8 = new Map([[30, 89], [38, 97], [43, 102], [44, 103], [50, 109], [67, 126], [84, 144], [100, 160]]) // prettier-ignore
 const maps = new Map()
+const e7 = [[148, 236], [149, 237], [150, 243]] // prettier-ignore
+const e8 = [[30, 89], [38, 97], [43, 102], [44, 103], [50, 109], [67, 126], [84, 144], [100, 160]] // prettier-ignore
+const preencoders = {
+  __proto__: null,
+  big5: (p) => ((((p / 157) | 0) + 0x81) << 8) | ((p % 157 < 0x3f ? 0x40 : 0x62) + (p % 157)),
+  shift_jis: (p) => {
+    const l = (p / 188) | 0
+    const t = p % 188
+    return ((l + (l < 0x1f ? 0x81 : 0xc1)) << 8) | ((t < 0x3f ? 0x40 : 0x41) + t)
+  },
+  'euc-jp': (p) => ((((p / 94) | 0) + 0xa1) << 8) | ((p % 94) + 0xa1),
+  'euc-kr': (p) => ((((p / 190) | 0) + 0x81) << 8) | ((p % 190) + 0x41),
+  gb18030: (p) => ((((p / 190) | 0) + 0x81) << 8) | ((p % 190 < 0x3f ? 0x40 : 0x41) + (p % 190)),
+}
+
+preencoders.gbk = preencoders.gb18030
 
 // We accept that encoders use non-trivial amount of mem, for perf
 // most are are 128 KiB mem, big5 is 380 KiB, lazy-loaded at first use
-function getMap(id, name = id) {
+function getMap(id, size) {
   const cached = maps.get(id)
   if (cached) return cached
-  const table = getTable(name)
-  const map = new Uint16Array(id === 'big5' ? 0x2_f8_a7 : 0xff_e7) // max codepoint in table + 1
+  let tname = id
+  const sjis = id === 'shift_jis'
+  if (id === 'gbk') tname = 'gb18030'
+  if (id === 'euc-jp' || sjis) tname = 'jis0208'
+  const table = getTable(tname)
+  const map = new Uint16Array(size)
+  const enc = preencoders[id] || ((p) => p + 1)
   for (let i = 0; i < table.length; i++) {
     const c = table[i]
     if (c === REP || c === undefined) continue
@@ -539,218 +556,50 @@ function getMap(id, name = id) {
         continue
       }
     } else {
-      if (id === 'shift_jis' && i >= 8272 && i <= 8835) continue
+      if (sjis && i >= 8272 && i <= 8835) continue
       if (map[c]) continue
     }
 
     if (typeof c === 'string') {
       // always a single codepoint here
-      map[c.codePointAt(0)] = 1 + i
+      map[c.codePointAt(0)] = enc(i)
     } else if (c !== REP) {
-      map[c] = 1 + i
+      map[c] = enc(i)
     }
   }
 
-  if (id === 'shift_jis' || id === 'euc-jp') map[0x22_12] = map[0xff_0d]
+  for (let i = 0; i < 0x80; i++) map[i] = i
+  if (sjis || id === 'euc-jp') {
+    if (sjis) map[0x80] = 0x80
+    const d = sjis ? 0xfe_c0 : 0x70_c0
+    for (let i = 0xff_61; i <= 0xff_9f; i++) map[i] = i - d
+    map[0x22_12] = map[0xff_0d]
+    map[0xa5] = 0x5c
+    map[0x20_3e] = 0x7e
+  } else if (tname === 'gb18030') {
+    if (id === 'gbk') map[0x20_ac] = 0x80
+    for (let i = 0xe7_8d; i <= 0xe7_93; i++) map[i] = i - 0x40_b4
+    for (const [a, b] of e7) map[0xe7_00 | a] = 0xa6_00 | b
+    for (const [a, b] of e8) map[0xe8_00 | a] = 0xfe_00 | b
+  }
+
   maps.set(id, map)
   return map
 }
 
-/* eslint-disable @exodus/mutable/no-param-reassign-prop-only */
-
-const encoders = {
-  big5: (err) => {
-    const map = getMap('big5')
-    const encode = (u8, i, cp) => {
-      let p = map[cp]
-      if (!p) return err(cp)
-      p--
-      const t = p % 157
-      u8[i] = 0x81 + ((p / 157) | 0)
-      u8[i + 1] = (t < 0x3f ? 0x40 : 0x62) + t
-      return 2
-    }
-
-    return { encode, ascii: 0x80 }
-  },
-  'euc-kr': (err) => {
-    const map = getMap('euc-kr')
-    const encode = (u8, i, cp) => {
-      let p = map[cp]
-      if (!p) return err(cp)
-      p--
-      u8[i] = 0x81 + ((p / 190) | 0)
-      u8[i + 1] = (p % 190) + 0x41
-      return 2
-    }
-
-    return { encode, ascii: 0x80 }
-  },
-  'euc-jp': (err) => {
-    const map = getMap('euc-jp', 'jis0208')
-    const encode = (u8, i, cp) => {
-      if (cp === 0xa5) {
-        u8[i] = 0x5c
-        return 1
-      }
-
-      if (cp === 0x20_3e) {
-        u8[i] = 0x7e
-        return 1
-      }
-
-      if (cp >= 0xff_61 && cp <= 0xff_9f) {
-        u8[i] = 0x8e
-        u8[i + 1] = cp - 0xfe_c0
-        return 2
-      }
-
-      let p = map[cp]
-      if (!p) return err(cp)
-      p--
-      u8[i] = ((p / 94) | 0) + 0xa1
-      u8[i + 1] = (p % 94) + 0xa1
-      return 2
-    }
-
-    return { encode, ascii: 0x80 }
-  },
-  shift_jis: (err) => {
-    const map = getMap('shift_jis', 'jis0208')
-    const encode = (u8, i, cp) => {
-      if (cp === 0xa5) {
-        u8[i] = 0x5c
-        return 1
-      }
-
-      if (cp === 0x20_3e) {
-        u8[i] = 0x7e
-        return 1
-      }
-
-      if (cp >= 0xff_61 && cp <= 0xff_9f) {
-        u8[i] = cp - 0xfe_c0
-        return 1
-      }
-
-      let p = map[cp]
-      if (!p) return err(cp)
-      p--
-      const l = (p / 188) | 0
-      const t = p % 188
-      u8[i] = (l < 0x1f ? 0x81 : 0xc1) + l
-      u8[i + 1] = (t < 0x3f ? 0x40 : 0x41) + t
-      return 2
-    }
-
-    return { encode, ascii: 0x81 }
-  },
-  gbk: (err) => {
-    const map = getMap('gb18030')
-
-    const encode = (u8, i, cp) => {
-      if (cp === 0xe5_e5) return err(cp)
-      if (cp === 0x20_ac) {
-        u8[i] = 0x80
-        return 1
-      }
-
-      if (cp >= 0xe7_8d && cp <= 0xe8_64) {
-        if (cp <= 0xe7_93) {
-          u8[i] = 0xa6
-          u8[i + 1] = cp - 0xe6_b4
-          return 2
-        }
-
-        const l = cp < 0xe8_00 ? 0xa6 : 0xfe
-        const t = (l === 0xa6 ? e7 : e8).get(cp & 0xff)
-        if (t) {
-          u8[i] = l
-          u8[i + 1] = t
-          return 2
-        }
-      }
-
-      let p = map[cp]
-      if (p) {
-        p--
-        const t = p % 190
-        u8[i] = 0x81 + ((p / 190) | 0)
-        u8[i + 1] = (t < 0x3f ? 0x40 : 0x41) + t
-        return 2
-      }
-
-      return err(cp)
-    }
-
-    return { encode, ascii: 0x80, width: 2 }
-  },
-  gb18030: (err) => {
-    const map = getMap('gb18030')
-    const gb18030r = getTable('gb18030-ranges')
-
-    const encode = (u8, i, cp) => {
-      if (cp === 0xe5_e5) return err(cp)
-      if (cp >= 0xe7_8d && cp <= 0xe8_64) {
-        if (cp <= 0xe7_93) {
-          u8[i] = 0xa6
-          u8[i + 1] = cp - 0xe6_b4
-          return 2
-        }
-
-        const l = cp < 0xe8_00 ? 0xa6 : 0xfe
-        const t = (l === 0xa6 ? e7 : e8).get(cp & 0xff)
-        if (t) {
-          u8[i] = l
-          u8[i + 1] = t
-          return 2
-        }
-      }
-
-      let p = map[cp]
-      if (p) {
-        p--
-        const t = p % 190
-        u8[i] = 0x81 + ((p / 190) | 0)
-        u8[i + 1] = (t < 0x3f ? 0x40 : 0x41) + t
-        return 2
-      }
-
-      let a = 0, b = 0 // prettier-ignore
-      for (const [c, d] of gb18030r) {
-        if (d > cp) break
-        a = c
-        b = d
-      }
-
-      let rp = cp === 0xe7_c7 ? 7457 : a + cp - b
-      u8[i] = 0x81 + ((rp / 12_600) | 0)
-      rp %= 12_600
-      u8[i + 1] = 0x30 + ((rp / 1260) | 0)
-      rp %= 1260
-      u8[i + 2] = 0x81 + ((rp / 10) | 0)
-      u8[i + 3] = 0x30 + (rp % 10)
-      return 4
-    }
-
-    return { encode, ascii: 0x80, width: 4 }
-  },
-}
-
-/* eslint-enable @exodus/mutable/no-param-reassign-prop-only */
-
+const encoders = new Set(['big5', 'euc-kr', 'euc-jp', 'shift_jis', 'gbk', 'gb18030'])
 const NON_LATIN = /[^\x00-\xFF]/ // eslint-disable-line no-control-regex
+let gb18030r
 
-export function multibyteEncoder(enc) {
-  if (!Object.hasOwn(encoders, enc)) throw new RangeError('Unsupported encoding')
-
-  // eslint-disable-next-line unicorn/consistent-function-scoping
-  const onErr = (code) => {
-    throw new TypeError(E_STRICT)
-  }
+export function multibyteEncoder(enc, onError) {
+  if (!encoders.has(enc)) throw new RangeError('Unsupported encoding')
+  const size = enc === 'big5' ? 0x2_f8_a7 : 0x1_00_00 // for big5, max codepoint in table + 1
+  const width = enc === 'gb18030' ? 4 : 2
+  const map = getMap(enc, size)
+  if (enc === 'gb18030' && !gb18030r) gb18030r = getTable('gb18030-ranges')
 
-  const { encode, ascii, width = 2 } = encoders[enc](onErr)
   return (str) => {
+    if (typeof str !== 'string') throw new TypeError(E_STRING)
     if (!NON_LATIN.test(str)) {
       try {
         return encodeAscii(str, E_STRICT)
@@ -761,29 +610,108 @@ export function multibyteEncoder(enc) {
     const u8 = new Uint8Array(length * width)
     let i = 0
     while (i < length) {
-      const x0 = str.charCodeAt(i)
-      if (x0 >= 128) break
-      u8[i++] = x0
+      const x = str.charCodeAt(i)
+      if (x >= 128) break
+      u8[i++] = x
+    }
+
+    // eslint-disable-next-line unicorn/consistent-function-scoping
+    const err = (code) => {
+      if (onError) return onError(code, u8, i)
+      throw new TypeError(E_STRICT)
     }
 
-    for (let j = i; j < length; j++) {
-      const x0 = str.charCodeAt(j)
-      if (x0 < ascii) {
-        u8[i++] = x0
-      } else if (x0 >= 0xd8_00 && x0 < 0xe0_00) {
-        if (x0 >= 0xdc_00 || j + 1 === length) {
-          onErr(x0) // Lone surrogate, TODO: how to handle this in non-strict?
+    if (!map || map.length < size) throw new Error('Unreachable') // Important for perf
+    if (enc === 'gb18030') {
+      // Deduping this branch hurts other encoders perf
+      const encode = (cp) => {
+        let a = 0, b = 0 // prettier-ignore
+        for (const [c, d] of gb18030r) {
+          if (d > cp) break
+          a = c
+          b = d
+        }
+
+        let rp = cp === 0xe7_c7 ? 7457 : a + cp - b
+        u8[i++] = 0x81 + ((rp / 12_600) | 0)
+        rp %= 12_600
+        u8[i++] = 0x30 + ((rp / 1260) | 0)
+        rp %= 1260
+        u8[i++] = 0x81 + ((rp / 10) | 0)
+        u8[i++] = 0x30 + (rp % 10)
+      }
+
+      for (let j = i; j < length; j++) {
+        const x = str.charCodeAt(j)
+        if (x >= 0xd8_00 && x < 0xe0_00) {
+          if (x >= 0xdc_00 || j + 1 === length) {
+            i += err(x) // lone
+          } else {
+            const x1 = str.charCodeAt(j + 1)
+            if (x1 < 0xdc_00 || x1 >= 0xe0_00) {
+              i += err(x) // lone
+            } else {
+              j++ // consume x1
+              encode(0x1_00_00 + ((x1 & 0x3_ff) | ((x & 0x3_ff) << 10)))
+            }
+          }
         } else {
-          const x1 = str.charCodeAt(j + 1)
-          if (x1 < 0xdc_00 || x1 > 0xe0_00) {
-            onErr(x0) // Lone surrogate, TODO: how to handle this in non-strict?
+          const e = map[x]
+          if (e & 0xff_00) {
+            u8[i++] = e >> 8
+            u8[i++] = e & 0xff
+          } else if (e || x === 0) {
+            u8[i++] = e
+          } else if (x === 0xe5_e5) {
+            i += err(x)
           } else {
-            j++ // consume x1
-            i += encode(u8, i, 0x1_00_00 + ((x1 & 0x3_ff) | ((x0 & 0x3_ff) << 10)))
+            encode(x)
+          }
+        }
+      }
+    } else {
+      const long =
+        enc === 'big5'
+          ? (x) => {
+              const e = map[x]
+              if (e & 0xff_00) {
+                u8[i++] = e >> 8
+                u8[i++] = e & 0xff
+              } else if (e || x === 0) {
+                u8[i++] = e
+              } else {
+                i += err(x)
+              }
+            }
+          : (x) => {
+              i += err(x)
+            }
+
+      for (let j = i; j < length; j++) {
+        const x = str.charCodeAt(j)
+        if (x >= 0xd8_00 && x < 0xe0_00) {
+          if (x >= 0xdc_00 || j + 1 === length) {
+            i += err(x) // lone
+          } else {
+            const x1 = str.charCodeAt(j + 1)
+            if (x1 < 0xdc_00 || x1 >= 0xe0_00) {
+              i += err(x) // lone
+            } else {
+              j++ // consume x1
+              long(0x1_00_00 + ((x1 & 0x3_ff) | ((x & 0x3_ff) << 10)))
+            }
+          }
+        } else {
+          const e = map[x]
+          if (e & 0xff_00) {
+            u8[i++] = e >> 8
+            u8[i++] = e & 0xff
+          } else if (e || x === 0) {
+            u8[i++] = e
+          } else {
+            i += err(x)
           }
         }
-      } else {
-        i += encode(u8, i, x0)
       }
     }
 
diff --git a/multi-byte.js b/multi-byte.js
index fc485ec..c7837da 100644
--- a/multi-byte.js
+++ b/multi-byte.js
@@ -1,5 +1,4 @@
 import { assertUint8 } from './assert.js'
-import { E_STRING } from './fallback/_utils.js'
 import { multibyteDecoder, multibyteEncoder } from './fallback/multi-byte.js'
 
 export function createMultibyteDecoder(encoding, loose = false) {
@@ -16,9 +15,5 @@ export function createMultibyteDecoder(encoding, loose = false) {
 export function createMultibyteEncoder(encoding, { mode = 'fatal' } = {}) {
   // TODO: replacement, truncate (replacement will need varying length)
   if (mode !== 'fatal') throw new Error('Unsupported mode')
-  const jsEncoder = multibyteEncoder(encoding) // asserts
-  return (s) => {
-    if (typeof s !== 'string') throw new TypeError(E_STRING)
-    return jsEncoder(s)
-  }
+  return multibyteEncoder(encoding) // asserts
 }
diff --git a/multi-byte.node.js b/multi-byte.node.js
index f6e2bd5..c8a6e1b 100644
--- a/multi-byte.node.js
+++ b/multi-byte.node.js
@@ -1,5 +1,5 @@
 import { assertUint8 } from './assert.js'
-import { isDeno, toBuf, E_STRING } from './fallback/_utils.js'
+import { isDeno, toBuf } from './fallback/_utils.js'
 import { isAsciiSuperset, multibyteDecoder, multibyteEncoder } from './fallback/multi-byte.js'
 import { isAscii } from 'node:buffer'
 
@@ -25,9 +25,5 @@ export function createMultibyteDecoder(encoding, loose = false) {
 export function createMultibyteEncoder(encoding, { mode = 'fatal' } = {}) {
   // TODO: replacement, truncate (replacement will need varying length)
   if (mode !== 'fatal') throw new Error('Unsupported mode')
-  const jsEncoder = multibyteEncoder(encoding) // asserts
-  return (s) => {
-    if (typeof s !== 'string') throw new TypeError(E_STRING)
-    return jsEncoder(s)
-  }
+  return multibyteEncoder(encoding) // asserts
 }

From 3e803c1c92f24db53f26739d215ce49dd0043f8f Mon Sep 17 00:00:00 2001
From: Nikita Skovoroda <chalkerx@gmail.com>
Date: Sun, 11 Jan 2026 13:31:02 +0400
Subject: [PATCH 3/3] test: limit round-trip tests on slow engines

---
 tests/multi-byte.encode.test.js | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tests/multi-byte.encode.test.js b/tests/multi-byte.encode.test.js
index 9b930dc..ea9627f 100644
--- a/tests/multi-byte.encode.test.js
+++ b/tests/multi-byte.encode.test.js
@@ -227,8 +227,12 @@ describe('roundtrip, tables', () => {
   }
 })
 
+const slowEngine =
+  process.env.EXODUS_TEST_PLATFORM === 'quickjs' ||
+  process.env.EXODUS_TEST_PLATFORM === 'xs' ||
+  process.env.EXODUS_TEST_PLATFORM === 'engine262'
 describe('roundtrip, full Unicode', () => {
-  const MAX = 0x10_ff_ff // Max Unicode codepoint
+  const MAX = slowEngine ? 0x1_ff_ff : 0x10_ff_ff // Max Unicode codepoint
 
   test('gb18030', { timeout: 60_000 }, (t) => {
     const enc = createMultibyteEncoder('gb18030')