aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/tokenizers/normalizers/__init__.py
diff options
context:
space:
mode:
authorS. Solomon Darnell2025-03-28 21:52:21 -0500
committerS. Solomon Darnell2025-03-28 21:52:21 -0500
commit4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch)
treeee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/tokenizers/normalizers/__init__.py
parentcc961e04ba734dd72309fb548a2f97d67d578813 (diff)
downloadgn-ai-master.tar.gz
two version of R2R are hereHEADmaster
Diffstat (limited to '.venv/lib/python3.12/site-packages/tokenizers/normalizers/__init__.py')
-rw-r--r--.venv/lib/python3.12/site-packages/tokenizers/normalizers/__init__.py29
1 files changed, 29 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/tokenizers/normalizers/__init__.py b/.venv/lib/python3.12/site-packages/tokenizers/normalizers/__init__.py
new file mode 100644
index 00000000..15a16f1e
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/tokenizers/normalizers/__init__.py
@@ -0,0 +1,29 @@
+from .. import normalizers
+
+
+Normalizer = normalizers.Normalizer
+BertNormalizer = normalizers.BertNormalizer
+NFD = normalizers.NFD
+NFKD = normalizers.NFKD
+NFC = normalizers.NFC
+NFKC = normalizers.NFKC
+Sequence = normalizers.Sequence
+Lowercase = normalizers.Lowercase
+Prepend = normalizers.Prepend
+Strip = normalizers.Strip
+StripAccents = normalizers.StripAccents
+Nmt = normalizers.Nmt
+Precompiled = normalizers.Precompiled
+Replace = normalizers.Replace
+
+
+NORMALIZERS = {"nfc": NFC, "nfd": NFD, "nfkc": NFKC, "nfkd": NFKD}
+
+
+def unicode_normalizer_from_str(normalizer: str) -> Normalizer:
+ if normalizer not in NORMALIZERS:
+ raise ValueError(
+ "{} is not a known unicode normalizer. Available are {}".format(normalizer, NORMALIZERS.keys())
+ )
+
+ return NORMALIZERS[normalizer]()