Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
67 changes: 67 additions & 0 deletions t/transformers/transformers_4.52.4.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
diff --git a/src/transformers/models/marian/tokenization_marian.py b/src/transformers/models/marian/tokenization_marian.py
index bf9e0a8a2a..361b81d3db 100644
--- a/src/transformers/models/marian/tokenization_marian.py
+++ b/src/transformers/models/marian/tokenization_marian.py
@@ -13,7 +13,6 @@
# limitations under the License.
import json
import os
-import re
import warnings
from pathlib import Path
from shutil import copyfile
@@ -104,7 +103,6 @@ class MarianTokenizer(PreTrainedTokenizer):

vocab_files_names = VOCAB_FILES_NAMES
model_input_names = ["input_ids", "attention_mask"]
- language_code_re = re.compile(">>.+<<") # type: re.Pattern

def __init__(
self,
@@ -186,10 +184,12 @@ class MarianTokenizer(PreTrainedTokenizer):

def remove_language_code(self, text: str):
"""Remove language codes like >>fr<< before sentencepiece"""
- match = self.language_code_re.match(text)
- code: list = [match.group(0)] if match else []
- return code, self.language_code_re.sub("", text)
-
+ code = []
+ if text.startswith(">>") and (end_loc := text.find("<<")) != -1:
+ code.append(text[: end_loc + 2])
+ text = text[end_loc + 2 :]
+ return code, text
+
def _tokenize(self, text: str) -> List[str]:
code, text = self.remove_language_code(text)
pieces = self.current_spm.encode(text, out_type=str)
diff --git a/src/transformers/optimization_tf.py b/src/transformers/optimization_tf.py
index 4da4ecc901..3222c685d9 100644
--- a/src/transformers/optimization_tf.py
+++ b/src/transformers/optimization_tf.py
@@ -14,7 +14,6 @@
# ==============================================================================
"""Functions and classes related to optimization (weight updates)."""

-import re
from typing import Callable, Optional, Union

import tensorflow as tf
@@ -296,12 +295,12 @@ class AdamWeightDecay(Adam):

if self._include_in_weight_decay:
for r in self._include_in_weight_decay:
- if re.search(r, param_name) is not None:
+ if r in param_name:
return True

if self._exclude_from_weight_decay:
for r in self._exclude_from_weight_decay:
- if re.search(r, param_name) is not None:
+ if r in param_name:
return False
return True