ExodusOSS · ChALkeR · Jan 11, 2026 · Jan 10, 2026 · Jan 11, 2026 · Jan 11, 2026
@@ -1,9 +1,10 @@
-import { asciiPrefix, decodeAscii, decodeLatin1, decodeUCS2 } from './latin1.js'
+import { E_STRING } from './_utils.js'
+import { asciiPrefix, decodeAscii, decodeLatin1, decodeUCS2, encodeAscii } from './latin1.js'
 import { getTable } from './multi-byte.table.js'
 
 export const E_STRICT = 'Input is not well-formed for this encoding'
 
-// TODO: optimize
+/* Decoders */
 
 // If the decoder is not cleared properly, state can be preserved between non-streaming calls!
 // See comment about fatal stream
@@ -504,3 +505,216 @@ export function multibyteDecoder(enc, loose = false) {
     return res + mapper.decode(arr, res.length, arr.length, stream)
   }
 }
+
+/* Encoders */
+
+const maps = new Map()
+const e7 = [[148, 236], [149, 237], [150, 243]] // prettier-ignore
+const e8 = [[30, 89], [38, 97], [43, 102], [44, 103], [50, 109], [67, 126], [84, 144], [100, 160]] // prettier-ignore
+const preencoders = {
+  __proto__: null,
+  big5: (p) => ((((p / 157) | 0) + 0x81) << 8) | ((p % 157 < 0x3f ? 0x40 : 0x62) + (p % 157)),
+  shift_jis: (p) => {
+    const l = (p / 188) | 0
+    const t = p % 188
+    return ((l + (l < 0x1f ? 0x81 : 0xc1)) << 8) | ((t < 0x3f ? 0x40 : 0x41) + t)
+  },
+  'euc-jp': (p) => ((((p / 94) | 0) + 0xa1) << 8) | ((p % 94) + 0xa1),
+  'euc-kr': (p) => ((((p / 190) | 0) + 0x81) << 8) | ((p % 190) + 0x41),
+  gb18030: (p) => ((((p / 190) | 0) + 0x81) << 8) | ((p % 190 < 0x3f ? 0x40 : 0x41) + (p % 190)),
+}
+
+preencoders.gbk = preencoders.gb18030
+
+// We accept that encoders use non-trivial amount of mem, for perf
+// most are are 128 KiB mem, big5 is 380 KiB, lazy-loaded at first use
+function getMap(id, size) {
+  const cached = maps.get(id)
+  if (cached) return cached
+  let tname = id
+  const sjis = id === 'shift_jis'
+  if (id === 'gbk') tname = 'gb18030'
+  if (id === 'euc-jp' || sjis) tname = 'jis0208'
+  const table = getTable(tname)
+  const map = new Uint16Array(size)
+  const enc = preencoders[id] || ((p) => p + 1)
+  for (let i = 0; i < table.length; i++) {
+    const c = table[i]
+    if (c === REP || c === undefined) continue
+    if (id === 'big5') {
+      if (i < 5024) continue // this also skips multi-codepoint strings
+      // In big5, all return first entries except for these
+      if (
+        map[c] &&
+        c !== 0x25_50 &&
+        c !== 0x25_5e &&
+        c !== 0x25_61 &&
+        c !== 0x25_6a &&
+        c !== 0x53_41 &&
+        c !== 0x53_45
+      ) {
+        continue
+      }
+    } else {
+      if (sjis && i >= 8272 && i <= 8835) continue
+      if (map[c]) continue
+    }
+
+    if (typeof c === 'string') {
+      // always a single codepoint here
+      map[c.codePointAt(0)] = enc(i)
+    } else if (c !== REP) {
+      map[c] = enc(i)
+    }
+  }
+
+  for (let i = 0; i < 0x80; i++) map[i] = i
+  if (sjis || id === 'euc-jp') {
+    if (sjis) map[0x80] = 0x80
+    const d = sjis ? 0xfe_c0 : 0x70_c0
+    for (let i = 0xff_61; i <= 0xff_9f; i++) map[i] = i - d
+    map[0x22_12] = map[0xff_0d]
+    map[0xa5] = 0x5c
+    map[0x20_3e] = 0x7e
+  } else if (tname === 'gb18030') {
+    if (id === 'gbk') map[0x20_ac] = 0x80
+    for (let i = 0xe7_8d; i <= 0xe7_93; i++) map[i] = i - 0x40_b4
+    for (const [a, b] of e7) map[0xe7_00 | a] = 0xa6_00 | b
+    for (const [a, b] of e8) map[0xe8_00 | a] = 0xfe_00 | b
+  }
+
+  maps.set(id, map)
+  return map
+}
+
+const encoders = new Set(['big5', 'euc-kr', 'euc-jp', 'shift_jis', 'gbk', 'gb18030'])
+const NON_LATIN = /[^\x00-\xFF]/ // eslint-disable-line no-control-regex
+let gb18030r
+
+export function multibyteEncoder(enc, onError) {
+  if (!encoders.has(enc)) throw new RangeError('Unsupported encoding')
+  const size = enc === 'big5' ? 0x2_f8_a7 : 0x1_00_00 // for big5, max codepoint in table + 1
+  const width = enc === 'gb18030' ? 4 : 2
+  const map = getMap(enc, size)
+  if (enc === 'gb18030' && !gb18030r) gb18030r = getTable('gb18030-ranges')
+
+  return (str) => {
+    if (typeof str !== 'string') throw new TypeError(E_STRING)
+    if (!NON_LATIN.test(str)) {
+      try {
+        return encodeAscii(str, E_STRICT)
+      } catch {}
+    }
+
+    const length = str.length
+    const u8 = new Uint8Array(length * width)
+    let i = 0
+    while (i < length) {
+      const x = str.charCodeAt(i)
+      if (x >= 128) break
+      u8[i++] = x
+    }
+
+    // eslint-disable-next-line unicorn/consistent-function-scoping
+    const err = (code) => {
+      if (onError) return onError(code, u8, i)
+      throw new TypeError(E_STRICT)
+    }
+
+    if (!map || map.length < size) throw new Error('Unreachable') // Important for perf
+    if (enc === 'gb18030') {
+      // Deduping this branch hurts other encoders perf
+      const encode = (cp) => {
+        let a = 0, b = 0 // prettier-ignore
+        for (const [c, d] of gb18030r) {
+          if (d > cp) break
+          a = c
+          b = d
+        }
+
+        let rp = cp === 0xe7_c7 ? 7457 : a + cp - b
+        u8[i++] = 0x81 + ((rp / 12_600) | 0)
+        rp %= 12_600
+        u8[i++] = 0x30 + ((rp / 1260) | 0)
+        rp %= 1260
+        u8[i++] = 0x81 + ((rp / 10) | 0)
+        u8[i++] = 0x30 + (rp % 10)
+      }
+
+      for (let j = i; j < length; j++) {
+        const x = str.charCodeAt(j)
+        if (x >= 0xd8_00 && x < 0xe0_00) {
+          if (x >= 0xdc_00 || j + 1 === length) {
+            i += err(x) // lone
+          } else {
+            const x1 = str.charCodeAt(j + 1)
+            if (x1 < 0xdc_00 || x1 >= 0xe0_00) {
+              i += err(x) // lone
+            } else {
+              j++ // consume x1
+              encode(0x1_00_00 + ((x1 & 0x3_ff) | ((x & 0x3_ff) << 10)))
+            }
+          }
+        } else {
+          const e = map[x]
+          if (e & 0xff_00) {
+            u8[i++] = e >> 8
+            u8[i++] = e & 0xff
+          } else if (e || x === 0) {
+            u8[i++] = e
+          } else if (x === 0xe5_e5) {
+            i += err(x)
+          } else {
+            encode(x)
+          }
+        }
+      }
+    } else {
+      const long =
+        enc === 'big5'
+          ? (x) => {
+              const e = map[x]
+              if (e & 0xff_00) {
+                u8[i++] = e >> 8
+                u8[i++] = e & 0xff
+              } else if (e || x === 0) {
+                u8[i++] = e
+              } else {
+                i += err(x)
+              }
+            }
+          : (x) => {
+              i += err(x)
+            }
+
+      for (let j = i; j < length; j++) {
+        const x = str.charCodeAt(j)
+        if (x >= 0xd8_00 && x < 0xe0_00) {
+          if (x >= 0xdc_00 || j + 1 === length) {
+            i += err(x) // lone
+          } else {
+            const x1 = str.charCodeAt(j + 1)
+            if (x1 < 0xdc_00 || x1 >= 0xe0_00) {
+              i += err(x) // lone
+            } else {
+              j++ // consume x1
+              long(0x1_00_00 + ((x1 & 0x3_ff) | ((x & 0x3_ff) << 10)))
+            }
+          }
+        } else {
+          const e = map[x]
+          if (e & 0xff_00) {
+            u8[i++] = e >> 8
+            u8[i++] = e & 0xff
+          } else if (e || x === 0) {
+            u8[i++] = e
+          } else {
+            i += err(x)
+          }
+        }
+      }
+    }
+
+    return i === u8.length ? u8 : u8.subarray(0, i)
+  }
+}
@@ -104,6 +104,7 @@ export function getTable(id) {
     res = new Array(sizes[id]) // array of strings or undefined
     unwrap(res, indices[id], 0, true)
     // Pointer code updates are embedded into the table
+    // These are skipped in encoder as encoder uses only pointers >= (0xA1 - 0x81) * 157
     res[1133] = '\xCA\u0304'
     res[1135] = '\xCA\u030C'
     res[1164] = '\xEA\u0304'

@@ -1,5 +1,5 @@
 import { assertUint8 } from './assert.js'
-import { multibyteDecoder } from './fallback/multi-byte.js'
+import { multibyteDecoder, multibyteEncoder } from './fallback/multi-byte.js'
 
 export function createMultibyteDecoder(encoding, loose = false) {
   const jsDecoder = multibyteDecoder(encoding, loose) // asserts
@@ -11,3 +11,9 @@ export function createMultibyteDecoder(encoding, loose = false) {
     return jsDecoder(arr, stream)
   }
 }
+
+export function createMultibyteEncoder(encoding, { mode = 'fatal' } = {}) {
+  // TODO: replacement, truncate (replacement will need varying length)
+  if (mode !== 'fatal') throw new Error('Unsupported mode')
+  return multibyteEncoder(encoding) // asserts
+}
@@ -1,6 +1,6 @@
 import { assertUint8 } from './assert.js'
 import { isDeno, toBuf } from './fallback/_utils.js'
-import { isAsciiSuperset, multibyteDecoder } from './fallback/multi-byte.js'
+import { isAsciiSuperset, multibyteDecoder, multibyteEncoder } from './fallback/multi-byte.js'
 import { isAscii } from 'node:buffer'
 
 export function createMultibyteDecoder(encoding, loose = false) {
@@ -21,3 +21,9 @@ export function createMultibyteDecoder(encoding, loose = false) {
     return jsDecoder(arr, stream)
   }
 }
+
+export function createMultibyteEncoder(encoding, { mode = 'fatal' } = {}) {
+  // TODO: replacement, truncate (replacement will need varying length)
+  if (mode !== 'fatal') throw new Error('Unsupported mode')
+  return multibyteEncoder(encoding) // asserts
+}