From 2b2e7b41cc08f77ee448572d927e0cd59ce78520 Mon Sep 17 00:00:00 2001
From: Marco Ribeiro <speaktomarco@gmail.com>
Date: Thu, 11 Aug 2022 19:36:53 -0300
Subject: [PATCH 1/3] Add unidecode_translate method

---
 unidecode/__init__.py | 86 ++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 84 insertions(+), 2 deletions(-)

diff --git a/unidecode/__init__.py b/unidecode/__init__.py
index 7ac3675..8c15a7e 100644
--- a/unidecode/__init__.py
+++ b/unidecode/__init__.py
@@ -17,9 +17,11 @@
 b'Knosos'
 """
 import warnings
-from typing import Dict, Optional, Sequence
+from typing import Dict, Iterator, Optional, Sequence
+from pathlib import Path
 
-Cache: Dict[int, Optional[Sequence[Optional[str]]]] = {}
+Cache = {} # type: Dict[int, Optional[Sequence[Optional[str]]]]
+Translator: Optional[dict[str, int]] = None
 
 class UnidecodeError(ValueError):
     def __init__(self, message: str, index: Optional[int] = None) -> None:
@@ -136,3 +138,83 @@ def _unidecode(string: str, errors: str, replace_str:str) -> str:
         retval.append(repl)
 
     return ''.join(retval)
+
+def preload_translator() -> dict[str, int]:
+    global Translator
+
+    if Translator is None:
+        Translator = {
+            codepoint : char
+
+            for file in Path(__file__).parent.glob('x*.py')
+            for codepoint, char in enumerate(
+                __import__(f'unidecode.{file.stem}', globals(), locals(), ['data']).data,
+                int(f'0{file.stem}', base=16) << 8
+            )
+            if codepoint > 127 and isinstance(char, str)
+        }
+
+    return Translator
+
+def _unidecode_translate_replace_iterator (string: str, replace_str: str) -> Iterator[int]:
+    replace_bytes = replace_str.encode()
+
+    for char in string:
+        char_ord = ord(char)
+
+        if char_ord > 127:
+            yield from replace_bytes
+
+        else:
+            yield char_ord
+
+def unidecode_translate(
+    string: str, errors: str = 'ignore', replace_str: str = '?', check_surrogates: bool = False
+) -> str:
+    """Transliterate an Unicode object into an ASCII string
+    This method is usually faster than unidecode_expect_nonascii/unidecode, but it uses more memory
+    To reduce first call time, invoke preload_translator to preload translation table
+
+    >>> unidecode("\u5317\u4EB0")
+    "Bei Jing "
+
+    See unidecode_expect_nonascii.
+    """
+    if check_surrogates:
+        for char in string:
+            if 0xd800 <= ord(char) <= 0xdfff:
+                warnings.warn(
+                    f'Surrogate character {char} will be ignored. '
+                    'You might be using a narrow Python build.',
+                    RuntimeWarning, 2
+                )
+
+    retval = string.translate(preload_translator())
+
+    if errors == 'preserve':
+        return retval
+
+    retval_bytes: bytes
+
+    if errors in ('ignore', 'strict') or (errors == 'replace' and replace_str == '?'):
+        try:
+            retval_bytes = retval.encode('ascii', errors=errors)
+
+        except UnicodeEncodeError as exc:
+            raise UnidecodeError(
+                f'no replacement found for character {exc.object[exc.start : exc.end]} '
+                f'in position {exc.start}',
+                exc.start
+            ) from None
+
+    elif errors == 'replace':
+        if replace_str == '?':
+            retval_bytes = retval.encode('ascii', errors='replace')
+
+        else:
+            retval_bytes = bytes(_unidecode_translate_replace_iterator(retval, replace_str))
+
+    else:
+        raise UnidecodeError(f'invalid value for errors parameter {errors}')
+
+    return retval_bytes.decode()

From 2a945f1d5394414393024f1f94242d1c150f9a2f Mon Sep 17 00:00:00 2001
From: Marco Ribeiro <speaktomarco@gmail.com>
Date: Thu, 11 Aug 2022 19:37:00 -0300
Subject: [PATCH 2/3] Fix print calls for Python 3

---
 benchmark.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/benchmark.py b/benchmark.py
index 7aaf6c4..4dc9537 100644
--- a/benchmark.py
+++ b/benchmark.py
@@ -1,26 +1,27 @@
 # -*- coding: utf-8 -*-
+from __future__ import print_function
 import timeit
 
 def main():
-	print "unidecode_expect_ascii, ASCII string"
+	print("unidecode_expect_ascii, ASCII string")
 	timeit.main([
 		'-s',
 		'from unidecode import unidecode_expect_ascii',
 		'unidecode_expect_ascii(u"Hello, World")'])
 
-	print "unidecode_expect_ascii, non-ASCII string"
+	print("unidecode_expect_ascii, non-ASCII string")
 	timeit.main([
 		'-s',
 		'from unidecode import unidecode_expect_ascii',
 		'unidecode_expect_ascii(u"¡Hola mundo!")'])
 
-	print "unidecode_expect_nonascii, ASCII string"
+	print("unidecode_expect_nonascii, ASCII string")
 	timeit.main([
 		'-s',
 		'from unidecode import unidecode_expect_nonascii',
 		'unidecode_expect_nonascii(u"Hello, World")'])
 
-	print "unidecode_expect_nonascii, non-ASCII string"
+	print("unidecode_expect_nonascii, non-ASCII string")
 	timeit.main([
 		'-s',
 		'from unidecode import unidecode_expect_nonascii',

From b43eb0ad8fb732d6a1bdc68a999989d58049ccc9 Mon Sep 17 00:00:00 2001
From: Marco Ribeiro <speaktomarco@gmail.com>
Date: Fri, 12 Aug 2022 10:36:56 -0300
Subject: [PATCH 3/3] Fix Translator type hint

---
 unidecode/__init__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/unidecode/__init__.py b/unidecode/__init__.py
index 8c15a7e..08544e9 100644
--- a/unidecode/__init__.py
+++ b/unidecode/__init__.py
@@ -21,7 +21,7 @@
 from pathlib import Path
 
 Cache = {} # type: Dict[int, Optional[Sequence[Optional[str]]]]
-Translator: Optional[dict[str, int]] = None
+Translator = None # type: Optional[Dict[int, str]]
 
 class UnidecodeError(ValueError):
     def __init__(self, message: str, index: Optional[int] = None) -> None:
@@ -139,7 +139,7 @@ def _unidecode(string: str, errors: str, replace_str:str) -> str:
 
     return ''.join(retval)
 
-def preload_translator() -> dict[str, int]:
+def preload_translator() -> Dict[int, str]:
     global Translator
 
     if Translator is None: