about summary refs log tree commit diff
path: root/.venv/lib/python3.12/site-packages/tokenizers/decoders
diff options
context:
space:
mode:
authorS. Solomon Darnell2025-03-28 21:52:21 -0500
committerS. Solomon Darnell2025-03-28 21:52:21 -0500
commit4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch)
treeee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/tokenizers/decoders
parentcc961e04ba734dd72309fb548a2f97d67d578813 (diff)
downloadgn-ai-master.tar.gz
two version of R2R are here HEAD master
Diffstat (limited to '.venv/lib/python3.12/site-packages/tokenizers/decoders')
-rw-r--r--.venv/lib/python3.12/site-packages/tokenizers/decoders/__init__.py14
-rw-r--r--.venv/lib/python3.12/site-packages/tokenizers/decoders/__init__.pyi271
2 files changed, 285 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/tokenizers/decoders/__init__.py b/.venv/lib/python3.12/site-packages/tokenizers/decoders/__init__.py
new file mode 100644
index 00000000..a717379c
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/tokenizers/decoders/__init__.py
@@ -0,0 +1,14 @@
+from .. import decoders
+
+
+Decoder = decoders.Decoder
+ByteLevel = decoders.ByteLevel
+Replace = decoders.Replace
+WordPiece = decoders.WordPiece
+ByteFallback = decoders.ByteFallback
+Fuse = decoders.Fuse
+Strip = decoders.Strip
+Metaspace = decoders.Metaspace
+BPEDecoder = decoders.BPEDecoder
+CTC = decoders.CTC
+Sequence = decoders.Sequence
diff --git a/.venv/lib/python3.12/site-packages/tokenizers/decoders/__init__.pyi b/.venv/lib/python3.12/site-packages/tokenizers/decoders/__init__.pyi
new file mode 100644
index 00000000..b967fbd1
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/tokenizers/decoders/__init__.pyi
@@ -0,0 +1,271 @@
+# Generated content DO NOT EDIT
+class Decoder:
+    """
+    Base class for all decoders
+
+    This class is not supposed to be instantiated directly. Instead, any implementation of
+    a Decoder will return an instance of this class when instantiated.
+    """
+    def decode(self, tokens):
+        """
+        Decode the given list of tokens to a final string
+
+        Args:
+            tokens (:obj:`List[str]`):
+                The list of tokens to decode
+
+        Returns:
+            :obj:`str`: The decoded string
+        """
+        pass
+
+class BPEDecoder(Decoder):
+    """
+    BPEDecoder Decoder
+
+    Args:
+        suffix (:obj:`str`, `optional`, defaults to :obj:`</w>`):
+            The suffix that was used to caracterize an end-of-word. This suffix will
+            be replaced by whitespaces during the decoding
+    """
+    def __init__(self, suffix="</w>"):
+        pass
+
+    def decode(self, tokens):
+        """
+        Decode the given list of tokens to a final string
+
+        Args:
+            tokens (:obj:`List[str]`):
+                The list of tokens to decode
+
+        Returns:
+            :obj:`str`: The decoded string
+        """
+        pass
+
+class ByteFallback(Decoder):
+    """
+    ByteFallback Decoder
+    ByteFallback is a simple trick which converts tokens looking like `<0x61>`
+    to pure bytes, and attempts to make them into a string. If the tokens
+    cannot be decoded you will get � instead for each inconvertable byte token
+
+    """
+    def __init__(self):
+        pass
+
+    def decode(self, tokens):
+        """
+        Decode the given list of tokens to a final string
+
+        Args:
+            tokens (:obj:`List[str]`):
+                The list of tokens to decode
+
+        Returns:
+            :obj:`str`: The decoded string
+        """
+        pass
+
+class ByteLevel(Decoder):
+    """
+    ByteLevel Decoder
+
+    This decoder is to be used in tandem with the :class:`~tokenizers.pre_tokenizers.ByteLevel`
+    :class:`~tokenizers.pre_tokenizers.PreTokenizer`.
+    """
+    def __init__(self):
+        pass
+
+    def decode(self, tokens):
+        """
+        Decode the given list of tokens to a final string
+
+        Args:
+            tokens (:obj:`List[str]`):
+                The list of tokens to decode
+
+        Returns:
+            :obj:`str`: The decoded string
+        """
+        pass
+
+class CTC(Decoder):
+    """
+    CTC Decoder
+
+    Args:
+        pad_token (:obj:`str`, `optional`, defaults to :obj:`<pad>`):
+            The pad token used by CTC to delimit a new token.
+        word_delimiter_token (:obj:`str`, `optional`, defaults to :obj:`|`):
+            The word delimiter token. It will be replaced by a <space>
+        cleanup (:obj:`bool`, `optional`, defaults to :obj:`True`):
+            Whether to cleanup some tokenization artifacts.
+            Mainly spaces before punctuation, and some abbreviated english forms.
+    """
+    def __init__(self, pad_token="<pad>", word_delimiter_token="|", cleanup=True):
+        pass
+
+    def decode(self, tokens):
+        """
+        Decode the given list of tokens to a final string
+
+        Args:
+            tokens (:obj:`List[str]`):
+                The list of tokens to decode
+
+        Returns:
+            :obj:`str`: The decoded string
+        """
+        pass
+
+class Fuse(Decoder):
+    """
+    Fuse Decoder
+    Fuse simply fuses every token into a single string.
+    This is the last step of decoding, this decoder exists only if
+    there is need to add other decoders *after* the fusion
+    """
+    def __init__(self):
+        pass
+
+    def decode(self, tokens):
+        """
+        Decode the given list of tokens to a final string
+
+        Args:
+            tokens (:obj:`List[str]`):
+                The list of tokens to decode
+
+        Returns:
+            :obj:`str`: The decoded string
+        """
+        pass
+
+class Metaspace(Decoder):
+    """
+    Metaspace Decoder
+
+    Args:
+        replacement (:obj:`str`, `optional`, defaults to :obj:`▁`):
+            The replacement character. Must be exactly one character. By default we
+            use the `▁` (U+2581) meta symbol (Same as in SentencePiece).
+
+        prepend_scheme (:obj:`str`, `optional`, defaults to :obj:`"always"`):
+            Whether to add a space to the first word if there isn't already one. This
+            lets us treat `hello` exactly like `say hello`.
+            Choices: "always", "never", "first". First means the space is only added on the first
+            token (relevant when special tokens are used or other pre_tokenizer are used).
+    """
+    def __init__(self, replacement="▁", prepend_scheme="always", split=True):
+        pass
+
+    def decode(self, tokens):
+        """
+        Decode the given list of tokens to a final string
+
+        Args:
+            tokens (:obj:`List[str]`):
+                The list of tokens to decode
+
+        Returns:
+            :obj:`str`: The decoded string
+        """
+        pass
+
+class Replace(Decoder):
+    """
+    Replace Decoder
+
+    This decoder is to be used in tandem with the :class:`~tokenizers.pre_tokenizers.Replace`
+    :class:`~tokenizers.pre_tokenizers.PreTokenizer`.
+    """
+    def __init__(self, pattern, content):
+        pass
+
+    def decode(self, tokens):
+        """
+        Decode the given list of tokens to a final string
+
+        Args:
+            tokens (:obj:`List[str]`):
+                The list of tokens to decode
+
+        Returns:
+            :obj:`str`: The decoded string
+        """
+        pass
+
+class Sequence(Decoder):
+    """
+    Sequence Decoder
+
+    Args:
+        decoders (:obj:`List[Decoder]`)
+            The decoders that need to be chained
+    """
+    def __init__(self, decoders):
+        pass
+
+    def decode(self, tokens):
+        """
+        Decode the given list of tokens to a final string
+
+        Args:
+            tokens (:obj:`List[str]`):
+                The list of tokens to decode
+
+        Returns:
+            :obj:`str`: The decoded string
+        """
+        pass
+
+class Strip(Decoder):
+    """
+    Strip normalizer
+    Strips n left characters of each token, or n right characters of each token
+    """
+    def __init__(self, content, left=0, right=0):
+        pass
+
+    def decode(self, tokens):
+        """
+        Decode the given list of tokens to a final string
+
+        Args:
+            tokens (:obj:`List[str]`):
+                The list of tokens to decode
+
+        Returns:
+            :obj:`str`: The decoded string
+        """
+        pass
+
+class WordPiece(Decoder):
+    """
+    WordPiece Decoder
+
+    Args:
+        prefix (:obj:`str`, `optional`, defaults to :obj:`##`):
+            The prefix to use for subwords that are not a beginning-of-word
+
+        cleanup (:obj:`bool`, `optional`, defaults to :obj:`True`):
+            Whether to cleanup some tokenization artifacts. Mainly spaces before punctuation,
+            and some abbreviated english forms.
+    """
+    def __init__(self, prefix="##", cleanup=True):
+        pass
+
+    def decode(self, tokens):
+        """
+        Decode the given list of tokens to a final string
+
+        Args:
+            tokens (:obj:`List[str]`):
+                The list of tokens to decode
+
+        Returns:
+            :obj:`str`: The decoded string
+        """
+        pass