aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/tokenizers/normalizers/__init__.pyi
diff options
context:
space:
mode:
Diffstat (limited to '.venv/lib/python3.12/site-packages/tokenizers/normalizers/__init__.pyi')
-rw-r--r--.venv/lib/python3.12/site-packages/tokenizers/normalizers/__init__.pyi595
1 files changed, 595 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/tokenizers/normalizers/__init__.pyi b/.venv/lib/python3.12/site-packages/tokenizers/normalizers/__init__.pyi
new file mode 100644
index 00000000..507d4473
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/tokenizers/normalizers/__init__.pyi
@@ -0,0 +1,595 @@
+# Generated content DO NOT EDIT
+class Normalizer:
+ """
+ Base class for all normalizers
+
+ This class is not supposed to be instantiated directly. Instead, any implementation of a
+ Normalizer will return an instance of this class when instantiated.
+ """
+ def normalize(self, normalized):
+ """
+ Normalize a :class:`~tokenizers.NormalizedString` in-place
+
+ This method allows to modify a :class:`~tokenizers.NormalizedString` to
+ keep track of the alignment information. If you just want to see the result
+ of the normalization on a raw string, you can use
+ :meth:`~tokenizers.normalizers.Normalizer.normalize_str`
+
+ Args:
+ normalized (:class:`~tokenizers.NormalizedString`):
+ The normalized string on which to apply this
+ :class:`~tokenizers.normalizers.Normalizer`
+ """
+ pass
+
+ def normalize_str(self, sequence):
+ """
+ Normalize the given string
+
+ This method provides a way to visualize the effect of a
+ :class:`~tokenizers.normalizers.Normalizer` but it does not keep track of the alignment
+ information. If you need to get/convert offsets, you can use
+ :meth:`~tokenizers.normalizers.Normalizer.normalize`
+
+ Args:
+ sequence (:obj:`str`):
+ A string to normalize
+
+ Returns:
+ :obj:`str`: A string after normalization
+ """
+ pass
+
+class BertNormalizer(Normalizer):
+ """
+ BertNormalizer
+
+ Takes care of normalizing raw text before giving it to a Bert model.
+ This includes cleaning the text, handling accents, chinese chars and lowercasing
+
+ Args:
+ clean_text (:obj:`bool`, `optional`, defaults to :obj:`True`):
+ Whether to clean the text, by removing any control characters
+ and replacing all whitespaces by the classic one.
+
+ handle_chinese_chars (:obj:`bool`, `optional`, defaults to :obj:`True`):
+ Whether to handle chinese chars by putting spaces around them.
+
+ strip_accents (:obj:`bool`, `optional`):
+ Whether to strip all accents. If this option is not specified (ie == None),
+ then it will be determined by the value for `lowercase` (as in the original Bert).
+
+ lowercase (:obj:`bool`, `optional`, defaults to :obj:`True`):
+ Whether to lowercase.
+ """
+ def __init__(self, clean_text=True, handle_chinese_chars=True, strip_accents=None, lowercase=True):
+ pass
+
+ def normalize(self, normalized):
+ """
+ Normalize a :class:`~tokenizers.NormalizedString` in-place
+
+ This method allows to modify a :class:`~tokenizers.NormalizedString` to
+ keep track of the alignment information. If you just want to see the result
+ of the normalization on a raw string, you can use
+ :meth:`~tokenizers.normalizers.Normalizer.normalize_str`
+
+ Args:
+ normalized (:class:`~tokenizers.NormalizedString`):
+ The normalized string on which to apply this
+ :class:`~tokenizers.normalizers.Normalizer`
+ """
+ pass
+
+ def normalize_str(self, sequence):
+ """
+ Normalize the given string
+
+ This method provides a way to visualize the effect of a
+ :class:`~tokenizers.normalizers.Normalizer` but it does not keep track of the alignment
+ information. If you need to get/convert offsets, you can use
+ :meth:`~tokenizers.normalizers.Normalizer.normalize`
+
+ Args:
+ sequence (:obj:`str`):
+ A string to normalize
+
+ Returns:
+ :obj:`str`: A string after normalization
+ """
+ pass
+
+class Lowercase(Normalizer):
+ """
+ Lowercase Normalizer
+ """
+ def __init__(self):
+ pass
+
+ def normalize(self, normalized):
+ """
+ Normalize a :class:`~tokenizers.NormalizedString` in-place
+
+ This method allows to modify a :class:`~tokenizers.NormalizedString` to
+ keep track of the alignment information. If you just want to see the result
+ of the normalization on a raw string, you can use
+ :meth:`~tokenizers.normalizers.Normalizer.normalize_str`
+
+ Args:
+ normalized (:class:`~tokenizers.NormalizedString`):
+ The normalized string on which to apply this
+ :class:`~tokenizers.normalizers.Normalizer`
+ """
+ pass
+
+ def normalize_str(self, sequence):
+ """
+ Normalize the given string
+
+ This method provides a way to visualize the effect of a
+ :class:`~tokenizers.normalizers.Normalizer` but it does not keep track of the alignment
+ information. If you need to get/convert offsets, you can use
+ :meth:`~tokenizers.normalizers.Normalizer.normalize`
+
+ Args:
+ sequence (:obj:`str`):
+ A string to normalize
+
+ Returns:
+ :obj:`str`: A string after normalization
+ """
+ pass
+
+class NFC(Normalizer):
+ """
+ NFC Unicode Normalizer
+ """
+ def __init__(self):
+ pass
+
+ def normalize(self, normalized):
+ """
+ Normalize a :class:`~tokenizers.NormalizedString` in-place
+
+ This method allows to modify a :class:`~tokenizers.NormalizedString` to
+ keep track of the alignment information. If you just want to see the result
+ of the normalization on a raw string, you can use
+ :meth:`~tokenizers.normalizers.Normalizer.normalize_str`
+
+ Args:
+ normalized (:class:`~tokenizers.NormalizedString`):
+ The normalized string on which to apply this
+ :class:`~tokenizers.normalizers.Normalizer`
+ """
+ pass
+
+ def normalize_str(self, sequence):
+ """
+ Normalize the given string
+
+ This method provides a way to visualize the effect of a
+ :class:`~tokenizers.normalizers.Normalizer` but it does not keep track of the alignment
+ information. If you need to get/convert offsets, you can use
+ :meth:`~tokenizers.normalizers.Normalizer.normalize`
+
+ Args:
+ sequence (:obj:`str`):
+ A string to normalize
+
+ Returns:
+ :obj:`str`: A string after normalization
+ """
+ pass
+
+class NFD(Normalizer):
+ """
+ NFD Unicode Normalizer
+ """
+ def __init__(self):
+ pass
+
+ def normalize(self, normalized):
+ """
+ Normalize a :class:`~tokenizers.NormalizedString` in-place
+
+ This method allows to modify a :class:`~tokenizers.NormalizedString` to
+ keep track of the alignment information. If you just want to see the result
+ of the normalization on a raw string, you can use
+ :meth:`~tokenizers.normalizers.Normalizer.normalize_str`
+
+ Args:
+ normalized (:class:`~tokenizers.NormalizedString`):
+ The normalized string on which to apply this
+ :class:`~tokenizers.normalizers.Normalizer`
+ """
+ pass
+
+ def normalize_str(self, sequence):
+ """
+ Normalize the given string
+
+ This method provides a way to visualize the effect of a
+ :class:`~tokenizers.normalizers.Normalizer` but it does not keep track of the alignment
+ information. If you need to get/convert offsets, you can use
+ :meth:`~tokenizers.normalizers.Normalizer.normalize`
+
+ Args:
+ sequence (:obj:`str`):
+ A string to normalize
+
+ Returns:
+ :obj:`str`: A string after normalization
+ """
+ pass
+
+class NFKC(Normalizer):
+ """
+ NFKC Unicode Normalizer
+ """
+ def __init__(self):
+ pass
+
+ def normalize(self, normalized):
+ """
+ Normalize a :class:`~tokenizers.NormalizedString` in-place
+
+ This method allows to modify a :class:`~tokenizers.NormalizedString` to
+ keep track of the alignment information. If you just want to see the result
+ of the normalization on a raw string, you can use
+ :meth:`~tokenizers.normalizers.Normalizer.normalize_str`
+
+ Args:
+ normalized (:class:`~tokenizers.NormalizedString`):
+ The normalized string on which to apply this
+ :class:`~tokenizers.normalizers.Normalizer`
+ """
+ pass
+
+ def normalize_str(self, sequence):
+ """
+ Normalize the given string
+
+ This method provides a way to visualize the effect of a
+ :class:`~tokenizers.normalizers.Normalizer` but it does not keep track of the alignment
+ information. If you need to get/convert offsets, you can use
+ :meth:`~tokenizers.normalizers.Normalizer.normalize`
+
+ Args:
+ sequence (:obj:`str`):
+ A string to normalize
+
+ Returns:
+ :obj:`str`: A string after normalization
+ """
+ pass
+
+class NFKD(Normalizer):
+ """
+ NFKD Unicode Normalizer
+ """
+ def __init__(self):
+ pass
+
+ def normalize(self, normalized):
+ """
+ Normalize a :class:`~tokenizers.NormalizedString` in-place
+
+ This method allows to modify a :class:`~tokenizers.NormalizedString` to
+ keep track of the alignment information. If you just want to see the result
+ of the normalization on a raw string, you can use
+ :meth:`~tokenizers.normalizers.Normalizer.normalize_str`
+
+ Args:
+ normalized (:class:`~tokenizers.NormalizedString`):
+ The normalized string on which to apply this
+ :class:`~tokenizers.normalizers.Normalizer`
+ """
+ pass
+
+ def normalize_str(self, sequence):
+ """
+ Normalize the given string
+
+ This method provides a way to visualize the effect of a
+ :class:`~tokenizers.normalizers.Normalizer` but it does not keep track of the alignment
+ information. If you need to get/convert offsets, you can use
+ :meth:`~tokenizers.normalizers.Normalizer.normalize`
+
+ Args:
+ sequence (:obj:`str`):
+ A string to normalize
+
+ Returns:
+ :obj:`str`: A string after normalization
+ """
+ pass
+
+class Nmt(Normalizer):
+ """
+ Nmt normalizer
+ """
+ def __init__(self):
+ pass
+
+ def normalize(self, normalized):
+ """
+ Normalize a :class:`~tokenizers.NormalizedString` in-place
+
+ This method allows to modify a :class:`~tokenizers.NormalizedString` to
+ keep track of the alignment information. If you just want to see the result
+ of the normalization on a raw string, you can use
+ :meth:`~tokenizers.normalizers.Normalizer.normalize_str`
+
+ Args:
+ normalized (:class:`~tokenizers.NormalizedString`):
+ The normalized string on which to apply this
+ :class:`~tokenizers.normalizers.Normalizer`
+ """
+ pass
+
+ def normalize_str(self, sequence):
+ """
+ Normalize the given string
+
+ This method provides a way to visualize the effect of a
+ :class:`~tokenizers.normalizers.Normalizer` but it does not keep track of the alignment
+ information. If you need to get/convert offsets, you can use
+ :meth:`~tokenizers.normalizers.Normalizer.normalize`
+
+ Args:
+ sequence (:obj:`str`):
+ A string to normalize
+
+ Returns:
+ :obj:`str`: A string after normalization
+ """
+ pass
+
+class Precompiled(Normalizer):
+ """
+ Precompiled normalizer
+ Don't use manually it is used for compatiblity for SentencePiece.
+ """
+ def __init__(self, precompiled_charsmap):
+ pass
+
+ def normalize(self, normalized):
+ """
+ Normalize a :class:`~tokenizers.NormalizedString` in-place
+
+ This method allows to modify a :class:`~tokenizers.NormalizedString` to
+ keep track of the alignment information. If you just want to see the result
+ of the normalization on a raw string, you can use
+ :meth:`~tokenizers.normalizers.Normalizer.normalize_str`
+
+ Args:
+ normalized (:class:`~tokenizers.NormalizedString`):
+ The normalized string on which to apply this
+ :class:`~tokenizers.normalizers.Normalizer`
+ """
+ pass
+
+ def normalize_str(self, sequence):
+ """
+ Normalize the given string
+
+ This method provides a way to visualize the effect of a
+ :class:`~tokenizers.normalizers.Normalizer` but it does not keep track of the alignment
+ information. If you need to get/convert offsets, you can use
+ :meth:`~tokenizers.normalizers.Normalizer.normalize`
+
+ Args:
+ sequence (:obj:`str`):
+ A string to normalize
+
+ Returns:
+ :obj:`str`: A string after normalization
+ """
+ pass
+
+class Prepend(Normalizer):
+ """
+ Prepend normalizer
+ """
+ def __init__(self, prepend):
+ pass
+
+ def normalize(self, normalized):
+ """
+ Normalize a :class:`~tokenizers.NormalizedString` in-place
+
+ This method allows to modify a :class:`~tokenizers.NormalizedString` to
+ keep track of the alignment information. If you just want to see the result
+ of the normalization on a raw string, you can use
+ :meth:`~tokenizers.normalizers.Normalizer.normalize_str`
+
+ Args:
+ normalized (:class:`~tokenizers.NormalizedString`):
+ The normalized string on which to apply this
+ :class:`~tokenizers.normalizers.Normalizer`
+ """
+ pass
+
+ def normalize_str(self, sequence):
+ """
+ Normalize the given string
+
+ This method provides a way to visualize the effect of a
+ :class:`~tokenizers.normalizers.Normalizer` but it does not keep track of the alignment
+ information. If you need to get/convert offsets, you can use
+ :meth:`~tokenizers.normalizers.Normalizer.normalize`
+
+ Args:
+ sequence (:obj:`str`):
+ A string to normalize
+
+ Returns:
+ :obj:`str`: A string after normalization
+ """
+ pass
+
+class Replace(Normalizer):
+ """
+ Replace normalizer
+ """
+ def __init__(self, pattern, content):
+ pass
+
+ def normalize(self, normalized):
+ """
+ Normalize a :class:`~tokenizers.NormalizedString` in-place
+
+ This method allows to modify a :class:`~tokenizers.NormalizedString` to
+ keep track of the alignment information. If you just want to see the result
+ of the normalization on a raw string, you can use
+ :meth:`~tokenizers.normalizers.Normalizer.normalize_str`
+
+ Args:
+ normalized (:class:`~tokenizers.NormalizedString`):
+ The normalized string on which to apply this
+ :class:`~tokenizers.normalizers.Normalizer`
+ """
+ pass
+
+ def normalize_str(self, sequence):
+ """
+ Normalize the given string
+
+ This method provides a way to visualize the effect of a
+ :class:`~tokenizers.normalizers.Normalizer` but it does not keep track of the alignment
+ information. If you need to get/convert offsets, you can use
+ :meth:`~tokenizers.normalizers.Normalizer.normalize`
+
+ Args:
+ sequence (:obj:`str`):
+ A string to normalize
+
+ Returns:
+ :obj:`str`: A string after normalization
+ """
+ pass
+
+class Sequence(Normalizer):
+ """
+ Allows concatenating multiple other Normalizer as a Sequence.
+ All the normalizers run in sequence in the given order
+
+ Args:
+ normalizers (:obj:`List[Normalizer]`):
+ A list of Normalizer to be run as a sequence
+ """
+ def normalize(self, normalized):
+ """
+ Normalize a :class:`~tokenizers.NormalizedString` in-place
+
+ This method allows to modify a :class:`~tokenizers.NormalizedString` to
+ keep track of the alignment information. If you just want to see the result
+ of the normalization on a raw string, you can use
+ :meth:`~tokenizers.normalizers.Normalizer.normalize_str`
+
+ Args:
+ normalized (:class:`~tokenizers.NormalizedString`):
+ The normalized string on which to apply this
+ :class:`~tokenizers.normalizers.Normalizer`
+ """
+ pass
+
+ def normalize_str(self, sequence):
+ """
+ Normalize the given string
+
+ This method provides a way to visualize the effect of a
+ :class:`~tokenizers.normalizers.Normalizer` but it does not keep track of the alignment
+ information. If you need to get/convert offsets, you can use
+ :meth:`~tokenizers.normalizers.Normalizer.normalize`
+
+ Args:
+ sequence (:obj:`str`):
+ A string to normalize
+
+ Returns:
+ :obj:`str`: A string after normalization
+ """
+ pass
+
+class Strip(Normalizer):
+ """
+ Strip normalizer
+ """
+ def __init__(self, left=True, right=True):
+ pass
+
+ def normalize(self, normalized):
+ """
+ Normalize a :class:`~tokenizers.NormalizedString` in-place
+
+ This method allows to modify a :class:`~tokenizers.NormalizedString` to
+ keep track of the alignment information. If you just want to see the result
+ of the normalization on a raw string, you can use
+ :meth:`~tokenizers.normalizers.Normalizer.normalize_str`
+
+ Args:
+ normalized (:class:`~tokenizers.NormalizedString`):
+ The normalized string on which to apply this
+ :class:`~tokenizers.normalizers.Normalizer`
+ """
+ pass
+
+ def normalize_str(self, sequence):
+ """
+ Normalize the given string
+
+ This method provides a way to visualize the effect of a
+ :class:`~tokenizers.normalizers.Normalizer` but it does not keep track of the alignment
+ information. If you need to get/convert offsets, you can use
+ :meth:`~tokenizers.normalizers.Normalizer.normalize`
+
+ Args:
+ sequence (:obj:`str`):
+ A string to normalize
+
+ Returns:
+ :obj:`str`: A string after normalization
+ """
+ pass
+
+class StripAccents(Normalizer):
+ """
+ StripAccents normalizer
+ """
+ def __init__(self):
+ pass
+
+ def normalize(self, normalized):
+ """
+ Normalize a :class:`~tokenizers.NormalizedString` in-place
+
+ This method allows to modify a :class:`~tokenizers.NormalizedString` to
+ keep track of the alignment information. If you just want to see the result
+ of the normalization on a raw string, you can use
+ :meth:`~tokenizers.normalizers.Normalizer.normalize_str`
+
+ Args:
+ normalized (:class:`~tokenizers.NormalizedString`):
+ The normalized string on which to apply this
+ :class:`~tokenizers.normalizers.Normalizer`
+ """
+ pass
+
+ def normalize_str(self, sequence):
+ """
+ Normalize the given string
+
+ This method provides a way to visualize the effect of a
+ :class:`~tokenizers.normalizers.Normalizer` but it does not keep track of the alignment
+ information. If you need to get/convert offsets, you can use
+ :meth:`~tokenizers.normalizers.Normalizer.normalize`
+
+ Args:
+ sequence (:obj:`str`):
+ A string to normalize
+
+ Returns:
+ :obj:`str`: A string after normalization
+ """
+ pass