diff options
Diffstat (limited to '.venv/lib/python3.12/site-packages/tokenizers/normalizers/__init__.py')
-rw-r--r-- | .venv/lib/python3.12/site-packages/tokenizers/normalizers/__init__.py | 29 |
1 files changed, 29 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/tokenizers/normalizers/__init__.py b/.venv/lib/python3.12/site-packages/tokenizers/normalizers/__init__.py new file mode 100644 index 00000000..15a16f1e --- /dev/null +++ b/.venv/lib/python3.12/site-packages/tokenizers/normalizers/__init__.py @@ -0,0 +1,29 @@ +from .. import normalizers + + +Normalizer = normalizers.Normalizer +BertNormalizer = normalizers.BertNormalizer +NFD = normalizers.NFD +NFKD = normalizers.NFKD +NFC = normalizers.NFC +NFKC = normalizers.NFKC +Sequence = normalizers.Sequence +Lowercase = normalizers.Lowercase +Prepend = normalizers.Prepend +Strip = normalizers.Strip +StripAccents = normalizers.StripAccents +Nmt = normalizers.Nmt +Precompiled = normalizers.Precompiled +Replace = normalizers.Replace + + +NORMALIZERS = {"nfc": NFC, "nfd": NFD, "nfkc": NFKC, "nfkd": NFKD} + + +def unicode_normalizer_from_str(normalizer: str) -> Normalizer: + if normalizer not in NORMALIZERS: + raise ValueError( + "{} is not a known unicode normalizer. Available are {}".format(normalizer, NORMALIZERS.keys()) + ) + + return NORMALIZERS[normalizer]() |