about summary refs log tree commit diff
path: root/.venv/lib/python3.12/site-packages/huggingface_hub/serialization/_tensorflow.py
diff options
context:
space:
mode:
Diffstat (limited to '.venv/lib/python3.12/site-packages/huggingface_hub/serialization/_tensorflow.py')
-rw-r--r--.venv/lib/python3.12/site-packages/huggingface_hub/serialization/_tensorflow.py95
1 files changed, 95 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/huggingface_hub/serialization/_tensorflow.py b/.venv/lib/python3.12/site-packages/huggingface_hub/serialization/_tensorflow.py
new file mode 100644
index 00000000..59ed8110
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/huggingface_hub/serialization/_tensorflow.py
@@ -0,0 +1,95 @@
+# Copyright 2024 The HuggingFace Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Contains tensorflow-specific helpers."""
+
+import math
+import re
+from typing import TYPE_CHECKING, Dict, Union
+
+from .. import constants
+from ._base import MAX_SHARD_SIZE, StateDictSplit, split_state_dict_into_shards_factory
+
+
+if TYPE_CHECKING:
+    import tensorflow as tf
+
+
+def split_tf_state_dict_into_shards(
+    state_dict: Dict[str, "tf.Tensor"],
+    *,
+    filename_pattern: str = constants.TF2_WEIGHTS_FILE_PATTERN,
+    max_shard_size: Union[int, str] = MAX_SHARD_SIZE,
+) -> StateDictSplit:
+    """
+    Split a model state dictionary in shards so that each shard is smaller than a given size.
+
+    The shards are determined by iterating through the `state_dict` in the order of its keys. There is no optimization
+    made to make each shard as close as possible to the maximum size passed. For example, if the limit is 10GB and we
+    have tensors of sizes [6GB, 6GB, 2GB, 6GB, 2GB, 2GB] they will get sharded as [6GB], [6+2GB], [6+2+2GB] and not
+    [6+2+2GB], [6+2GB], [6GB].
+
+    <Tip warning={true}>
+
+    If one of the model's tensor is bigger than `max_shard_size`, it will end up in its own shard which will have a
+    size greater than `max_shard_size`.
+
+    </Tip>
+
+    Args:
+        state_dict (`Dict[str, Tensor]`):
+            The state dictionary to save.
+        filename_pattern (`str`, *optional*):
+            The pattern to generate the files names in which the model will be saved. Pattern must be a string that
+            can be formatted with `filename_pattern.format(suffix=...)` and must contain the keyword `suffix`
+            Defaults to `"tf_model{suffix}.h5"`.
+        max_shard_size (`int` or `str`, *optional*):
+            The maximum size of each shard, in bytes. Defaults to 5GB.
+
+    Returns:
+        [`StateDictSplit`]: A `StateDictSplit` object containing the shards and the index to retrieve them.
+    """
+    return split_state_dict_into_shards_factory(
+        state_dict,
+        max_shard_size=max_shard_size,
+        filename_pattern=filename_pattern,
+        get_storage_size=get_tf_storage_size,
+    )
+
+
+def get_tf_storage_size(tensor: "tf.Tensor") -> int:
+    # Return `math.ceil` since dtype byte size can be a float (e.g., 0.125 for tf.bool).
+    # Better to overestimate than underestimate.
+    return math.ceil(tensor.numpy().size * _dtype_byte_size_tf(tensor.dtype))
+
+
+def _dtype_byte_size_tf(dtype) -> float:
+    """
+    Returns the size (in bytes) occupied by one parameter of type `dtype`.
+    Taken from https://github.com/huggingface/transformers/blob/74d9d0cebb0263a3f8ab9c280569170cc74651d0/src/transformers/modeling_tf_utils.py#L608.
+    NOTE: why not `tensor.numpy().nbytes`?
+    Example:
+    ```py
+    >>> _dtype_byte_size(tf.float32)
+    4
+    ```
+    """
+    import tensorflow as tf
+
+    if dtype == tf.bool:
+        return 1 / 8
+    bit_search = re.search(r"[^\d](\d+)$", dtype.name)
+    if bit_search is None:
+        raise ValueError(f"`dtype` is not a valid dtype: {dtype}.")
+    bit_size = int(bit_search.groups()[0])
+    return bit_size // 8