From 4a52a71956a8d46fcb7294ac71734504bb09bcc2 Mon Sep 17 00:00:00 2001 From: S. Solomon Darnell Date: Fri, 28 Mar 2025 21:52:21 -0500 Subject: two version of R2R are here --- .../tokenizers/normalizers/__init__.py | 29 ++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 .venv/lib/python3.12/site-packages/tokenizers/normalizers/__init__.py (limited to '.venv/lib/python3.12/site-packages/tokenizers/normalizers/__init__.py') diff --git a/.venv/lib/python3.12/site-packages/tokenizers/normalizers/__init__.py b/.venv/lib/python3.12/site-packages/tokenizers/normalizers/__init__.py new file mode 100644 index 00000000..15a16f1e --- /dev/null +++ b/.venv/lib/python3.12/site-packages/tokenizers/normalizers/__init__.py @@ -0,0 +1,29 @@ +from .. import normalizers + + +Normalizer = normalizers.Normalizer +BertNormalizer = normalizers.BertNormalizer +NFD = normalizers.NFD +NFKD = normalizers.NFKD +NFC = normalizers.NFC +NFKC = normalizers.NFKC +Sequence = normalizers.Sequence +Lowercase = normalizers.Lowercase +Prepend = normalizers.Prepend +Strip = normalizers.Strip +StripAccents = normalizers.StripAccents +Nmt = normalizers.Nmt +Precompiled = normalizers.Precompiled +Replace = normalizers.Replace + + +NORMALIZERS = {"nfc": NFC, "nfd": NFD, "nfkc": NFKC, "nfkd": NFKD} + + +def unicode_normalizer_from_str(normalizer: str) -> Normalizer: + if normalizer not in NORMALIZERS: + raise ValueError( + "{} is not a known unicode normalizer. Available are {}".format(normalizer, NORMALIZERS.keys()) + ) + + return NORMALIZERS[normalizer]() -- cgit v1.2.3