From 4a52a71956a8d46fcb7294ac71734504bb09bcc2 Mon Sep 17 00:00:00 2001 From: S. Solomon Darnell Date: Fri, 28 Mar 2025 21:52:21 -0500 Subject: two version of R2R are here --- .../site-packages/huggingface_hub/repocard_data.py | 750 +++++++++++++++++++++ 1 file changed, 750 insertions(+) create mode 100644 .venv/lib/python3.12/site-packages/huggingface_hub/repocard_data.py (limited to '.venv/lib/python3.12/site-packages/huggingface_hub/repocard_data.py') diff --git a/.venv/lib/python3.12/site-packages/huggingface_hub/repocard_data.py b/.venv/lib/python3.12/site-packages/huggingface_hub/repocard_data.py new file mode 100644 index 00000000..18366ca1 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/huggingface_hub/repocard_data.py @@ -0,0 +1,750 @@ +import copy +from collections import defaultdict +from dataclasses import dataclass +from typing import Any, Dict, List, Optional, Tuple, Union + +from huggingface_hub.utils import logging, yaml_dump + + +logger = logging.get_logger(__name__) + + +@dataclass +class EvalResult: + """ + Flattened representation of individual evaluation results found in model-index of Model Cards. + + For more information on the model-index spec, see https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1. + + Args: + task_type (`str`): + The task identifier. Example: "image-classification". + dataset_type (`str`): + The dataset identifier. Example: "common_voice". Use dataset id from https://hf.co/datasets. + dataset_name (`str`): + A pretty name for the dataset. Example: "Common Voice (French)". + metric_type (`str`): + The metric identifier. Example: "wer". Use metric id from https://hf.co/metrics. + metric_value (`Any`): + The metric value. Example: 0.9 or "20.0 ± 1.2". + task_name (`str`, *optional*): + A pretty name for the task. Example: "Speech Recognition". + dataset_config (`str`, *optional*): + The name of the dataset configuration used in `load_dataset()`. + Example: fr in `load_dataset("common_voice", "fr")`. See the `datasets` docs for more info: + https://hf.co/docs/datasets/package_reference/loading_methods#datasets.load_dataset.name + dataset_split (`str`, *optional*): + The split used in `load_dataset()`. Example: "test". + dataset_revision (`str`, *optional*): + The revision (AKA Git Sha) of the dataset used in `load_dataset()`. + Example: 5503434ddd753f426f4b38109466949a1217c2bb + dataset_args (`Dict[str, Any]`, *optional*): + The arguments passed during `Metric.compute()`. Example for `bleu`: `{"max_order": 4}` + metric_name (`str`, *optional*): + A pretty name for the metric. Example: "Test WER". + metric_config (`str`, *optional*): + The name of the metric configuration used in `load_metric()`. + Example: bleurt-large-512 in `load_metric("bleurt", "bleurt-large-512")`. + See the `datasets` docs for more info: https://huggingface.co/docs/datasets/v2.1.0/en/loading#load-configurations + metric_args (`Dict[str, Any]`, *optional*): + The arguments passed during `Metric.compute()`. Example for `bleu`: max_order: 4 + verified (`bool`, *optional*): + Indicates whether the metrics originate from Hugging Face's [evaluation service](https://huggingface.co/spaces/autoevaluate/model-evaluator) or not. Automatically computed by Hugging Face, do not set. + verify_token (`str`, *optional*): + A JSON Web Token that is used to verify whether the metrics originate from Hugging Face's [evaluation service](https://huggingface.co/spaces/autoevaluate/model-evaluator) or not. + source_name (`str`, *optional*): + The name of the source of the evaluation result. Example: "Open LLM Leaderboard". + source_url (`str`, *optional*): + The URL of the source of the evaluation result. Example: "https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard". + """ + + # Required + + # The task identifier + # Example: automatic-speech-recognition + task_type: str + + # The dataset identifier + # Example: common_voice. Use dataset id from https://hf.co/datasets + dataset_type: str + + # A pretty name for the dataset. + # Example: Common Voice (French) + dataset_name: str + + # The metric identifier + # Example: wer. Use metric id from https://hf.co/metrics + metric_type: str + + # Value of the metric. + # Example: 20.0 or "20.0 ± 1.2" + metric_value: Any + + # Optional + + # A pretty name for the task. + # Example: Speech Recognition + task_name: Optional[str] = None + + # The name of the dataset configuration used in `load_dataset()`. + # Example: fr in `load_dataset("common_voice", "fr")`. + # See the `datasets` docs for more info: + # https://huggingface.co/docs/datasets/package_reference/loading_methods#datasets.load_dataset.name + dataset_config: Optional[str] = None + + # The split used in `load_dataset()`. + # Example: test + dataset_split: Optional[str] = None + + # The revision (AKA Git Sha) of the dataset used in `load_dataset()`. + # Example: 5503434ddd753f426f4b38109466949a1217c2bb + dataset_revision: Optional[str] = None + + # The arguments passed during `Metric.compute()`. + # Example for `bleu`: max_order: 4 + dataset_args: Optional[Dict[str, Any]] = None + + # A pretty name for the metric. + # Example: Test WER + metric_name: Optional[str] = None + + # The name of the metric configuration used in `load_metric()`. + # Example: bleurt-large-512 in `load_metric("bleurt", "bleurt-large-512")`. + # See the `datasets` docs for more info: https://huggingface.co/docs/datasets/v2.1.0/en/loading#load-configurations + metric_config: Optional[str] = None + + # The arguments passed during `Metric.compute()`. + # Example for `bleu`: max_order: 4 + metric_args: Optional[Dict[str, Any]] = None + + # Indicates whether the metrics originate from Hugging Face's [evaluation service](https://huggingface.co/spaces/autoevaluate/model-evaluator) or not. Automatically computed by Hugging Face, do not set. + verified: Optional[bool] = None + + # A JSON Web Token that is used to verify whether the metrics originate from Hugging Face's [evaluation service](https://huggingface.co/spaces/autoevaluate/model-evaluator) or not. + verify_token: Optional[str] = None + + # The name of the source of the evaluation result. + # Example: Open LLM Leaderboard + source_name: Optional[str] = None + + # The URL of the source of the evaluation result. + # Example: https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard + source_url: Optional[str] = None + + @property + def unique_identifier(self) -> tuple: + """Returns a tuple that uniquely identifies this evaluation.""" + return ( + self.task_type, + self.dataset_type, + self.dataset_config, + self.dataset_split, + self.dataset_revision, + ) + + def is_equal_except_value(self, other: "EvalResult") -> bool: + """ + Return True if `self` and `other` describe exactly the same metric but with a + different value. + """ + for key, _ in self.__dict__.items(): + if key == "metric_value": + continue + # For metrics computed by Hugging Face's evaluation service, `verify_token` is derived from `metric_value`, + # so we exclude it here in the comparison. + if key != "verify_token" and getattr(self, key) != getattr(other, key): + return False + return True + + def __post_init__(self) -> None: + if self.source_name is not None and self.source_url is None: + raise ValueError("If `source_name` is provided, `source_url` must also be provided.") + + +@dataclass +class CardData: + """Structure containing metadata from a RepoCard. + + [`CardData`] is the parent class of [`ModelCardData`] and [`DatasetCardData`]. + + Metadata can be exported as a dictionary or YAML. Export can be customized to alter the representation of the data + (example: flatten evaluation results). `CardData` behaves as a dictionary (can get, pop, set values) but do not + inherit from `dict` to allow this export step. + """ + + def __init__(self, ignore_metadata_errors: bool = False, **kwargs): + self.__dict__.update(kwargs) + + def to_dict(self): + """Converts CardData to a dict. + + Returns: + `dict`: CardData represented as a dictionary ready to be dumped to a YAML + block for inclusion in a README.md file. + """ + + data_dict = copy.deepcopy(self.__dict__) + self._to_dict(data_dict) + return {key: value for key, value in data_dict.items() if value is not None} + + def _to_dict(self, data_dict): + """Use this method in child classes to alter the dict representation of the data. Alter the dict in-place. + + Args: + data_dict (`dict`): The raw dict representation of the card data. + """ + pass + + def to_yaml(self, line_break=None, original_order: Optional[List[str]] = None) -> str: + """Dumps CardData to a YAML block for inclusion in a README.md file. + + Args: + line_break (str, *optional*): + The line break to use when dumping to yaml. + + Returns: + `str`: CardData represented as a YAML block. + """ + if original_order: + self.__dict__ = { + k: self.__dict__[k] + for k in original_order + list(set(self.__dict__.keys()) - set(original_order)) + if k in self.__dict__ + } + return yaml_dump(self.to_dict(), sort_keys=False, line_break=line_break).strip() + + def __repr__(self): + return repr(self.__dict__) + + def __str__(self): + return self.to_yaml() + + def get(self, key: str, default: Any = None) -> Any: + """Get value for a given metadata key.""" + value = self.__dict__.get(key) + return default if value is None else value + + def pop(self, key: str, default: Any = None) -> Any: + """Pop value for a given metadata key.""" + return self.__dict__.pop(key, default) + + def __getitem__(self, key: str) -> Any: + """Get value for a given metadata key.""" + return self.__dict__[key] + + def __setitem__(self, key: str, value: Any) -> None: + """Set value for a given metadata key.""" + self.__dict__[key] = value + + def __contains__(self, key: str) -> bool: + """Check if a given metadata key is set.""" + return key in self.__dict__ + + def __len__(self) -> int: + """Return the number of metadata keys set.""" + return len(self.__dict__) + + +class ModelCardData(CardData): + """Model Card Metadata that is used by Hugging Face Hub when included at the top of your README.md + + Args: + base_model (`str` or `List[str]`, *optional*): + The identifier of the base model from which the model derives. This is applicable for example if your model is a + fine-tune or adapter of an existing model. The value must be the ID of a model on the Hub (or a list of IDs + if your model derives from multiple models). Defaults to None. + datasets (`Union[str, List[str]]`, *optional*): + Dataset or list of datasets that were used to train this model. Should be a dataset ID + found on https://hf.co/datasets. Defaults to None. + eval_results (`Union[List[EvalResult], EvalResult]`, *optional*): + List of `huggingface_hub.EvalResult` that define evaluation results of the model. If provided, + `model_name` is used to as a name on PapersWithCode's leaderboards. Defaults to `None`. + language (`Union[str, List[str]]`, *optional*): + Language of model's training data or metadata. It must be an ISO 639-1, 639-2 or + 639-3 code (two/three letters), or a special value like "code", "multilingual". Defaults to `None`. + library_name (`str`, *optional*): + Name of library used by this model. Example: keras or any library from + https://github.com/huggingface/huggingface.js/blob/main/packages/tasks/src/model-libraries.ts. + Defaults to None. + license (`str`, *optional*): + License of this model. Example: apache-2.0 or any license from + https://huggingface.co/docs/hub/repositories-licenses. Defaults to None. + license_name (`str`, *optional*): + Name of the license of this model. Defaults to None. To be used in conjunction with `license_link`. + Common licenses (Apache-2.0, MIT, CC-BY-SA-4.0) do not need a name. In that case, use `license` instead. + license_link (`str`, *optional*): + Link to the license of this model. Defaults to None. To be used in conjunction with `license_name`. + Common licenses (Apache-2.0, MIT, CC-BY-SA-4.0) do not need a link. In that case, use `license` instead. + metrics (`List[str]`, *optional*): + List of metrics used to evaluate this model. Should be a metric name that can be found + at https://hf.co/metrics. Example: 'accuracy'. Defaults to None. + model_name (`str`, *optional*): + A name for this model. It is used along with + `eval_results` to construct the `model-index` within the card's metadata. The name + you supply here is what will be used on PapersWithCode's leaderboards. If None is provided + then the repo name is used as a default. Defaults to None. + pipeline_tag (`str`, *optional*): + The pipeline tag associated with the model. Example: "text-classification". + tags (`List[str]`, *optional*): + List of tags to add to your model that can be used when filtering on the Hugging + Face Hub. Defaults to None. + ignore_metadata_errors (`str`): + If True, errors while parsing the metadata section will be ignored. Some information might be lost during + the process. Use it at your own risk. + kwargs (`dict`, *optional*): + Additional metadata that will be added to the model card. Defaults to None. + + Example: + ```python + >>> from huggingface_hub import ModelCardData + >>> card_data = ModelCardData( + ... language="en", + ... license="mit", + ... library_name="timm", + ... tags=['image-classification', 'resnet'], + ... ) + >>> card_data.to_dict() + {'language': 'en', 'license': 'mit', 'library_name': 'timm', 'tags': ['image-classification', 'resnet']} + + ``` + """ + + def __init__( + self, + *, + base_model: Optional[Union[str, List[str]]] = None, + datasets: Optional[Union[str, List[str]]] = None, + eval_results: Optional[List[EvalResult]] = None, + language: Optional[Union[str, List[str]]] = None, + library_name: Optional[str] = None, + license: Optional[str] = None, + license_name: Optional[str] = None, + license_link: Optional[str] = None, + metrics: Optional[List[str]] = None, + model_name: Optional[str] = None, + pipeline_tag: Optional[str] = None, + tags: Optional[List[str]] = None, + ignore_metadata_errors: bool = False, + **kwargs, + ): + self.base_model = base_model + self.datasets = datasets + self.eval_results = eval_results + self.language = language + self.library_name = library_name + self.license = license + self.license_name = license_name + self.license_link = license_link + self.metrics = metrics + self.model_name = model_name + self.pipeline_tag = pipeline_tag + self.tags = _to_unique_list(tags) + + model_index = kwargs.pop("model-index", None) + if model_index: + try: + model_name, eval_results = model_index_to_eval_results(model_index) + self.model_name = model_name + self.eval_results = eval_results + except (KeyError, TypeError) as error: + if ignore_metadata_errors: + logger.warning("Invalid model-index. Not loading eval results into CardData.") + else: + raise ValueError( + f"Invalid `model_index` in metadata cannot be parsed: {error.__class__} {error}. Pass" + " `ignore_metadata_errors=True` to ignore this error while loading a Model Card. Warning:" + " some information will be lost. Use it at your own risk." + ) + + super().__init__(**kwargs) + + if self.eval_results: + if isinstance(self.eval_results, EvalResult): + self.eval_results = [self.eval_results] + if self.model_name is None: + raise ValueError("Passing `eval_results` requires `model_name` to be set.") + + def _to_dict(self, data_dict): + """Format the internal data dict. In this case, we convert eval results to a valid model index""" + if self.eval_results is not None: + data_dict["model-index"] = eval_results_to_model_index(self.model_name, self.eval_results) + del data_dict["eval_results"], data_dict["model_name"] + + +class DatasetCardData(CardData): + """Dataset Card Metadata that is used by Hugging Face Hub when included at the top of your README.md + + Args: + language (`List[str]`, *optional*): + Language of dataset's data or metadata. It must be an ISO 639-1, 639-2 or + 639-3 code (two/three letters), or a special value like "code", "multilingual". + license (`Union[str, List[str]]`, *optional*): + License(s) of this dataset. Example: apache-2.0 or any license from + https://huggingface.co/docs/hub/repositories-licenses. + annotations_creators (`Union[str, List[str]]`, *optional*): + How the annotations for the dataset were created. + Options are: 'found', 'crowdsourced', 'expert-generated', 'machine-generated', 'no-annotation', 'other'. + language_creators (`Union[str, List[str]]`, *optional*): + How the text-based data in the dataset was created. + Options are: 'found', 'crowdsourced', 'expert-generated', 'machine-generated', 'other' + multilinguality (`Union[str, List[str]]`, *optional*): + Whether the dataset is multilingual. + Options are: 'monolingual', 'multilingual', 'translation', 'other'. + size_categories (`Union[str, List[str]]`, *optional*): + The number of examples in the dataset. Options are: 'n<1K', '1K1T', and 'other'. + source_datasets (`List[str]]`, *optional*): + Indicates whether the dataset is an original dataset or extended from another existing dataset. + Options are: 'original' and 'extended'. + task_categories (`Union[str, List[str]]`, *optional*): + What categories of task does the dataset support? + task_ids (`Union[str, List[str]]`, *optional*): + What specific tasks does the dataset support? + paperswithcode_id (`str`, *optional*): + ID of the dataset on PapersWithCode. + pretty_name (`str`, *optional*): + A more human-readable name for the dataset. (ex. "Cats vs. Dogs") + train_eval_index (`Dict`, *optional*): + A dictionary that describes the necessary spec for doing evaluation on the Hub. + If not provided, it will be gathered from the 'train-eval-index' key of the kwargs. + config_names (`Union[str, List[str]]`, *optional*): + A list of the available dataset configs for the dataset. + """ + + def __init__( + self, + *, + language: Optional[Union[str, List[str]]] = None, + license: Optional[Union[str, List[str]]] = None, + annotations_creators: Optional[Union[str, List[str]]] = None, + language_creators: Optional[Union[str, List[str]]] = None, + multilinguality: Optional[Union[str, List[str]]] = None, + size_categories: Optional[Union[str, List[str]]] = None, + source_datasets: Optional[List[str]] = None, + task_categories: Optional[Union[str, List[str]]] = None, + task_ids: Optional[Union[str, List[str]]] = None, + paperswithcode_id: Optional[str] = None, + pretty_name: Optional[str] = None, + train_eval_index: Optional[Dict] = None, + config_names: Optional[Union[str, List[str]]] = None, + ignore_metadata_errors: bool = False, + **kwargs, + ): + self.annotations_creators = annotations_creators + self.language_creators = language_creators + self.language = language + self.license = license + self.multilinguality = multilinguality + self.size_categories = size_categories + self.source_datasets = source_datasets + self.task_categories = task_categories + self.task_ids = task_ids + self.paperswithcode_id = paperswithcode_id + self.pretty_name = pretty_name + self.config_names = config_names + + # TODO - maybe handle this similarly to EvalResult? + self.train_eval_index = train_eval_index or kwargs.pop("train-eval-index", None) + super().__init__(**kwargs) + + def _to_dict(self, data_dict): + data_dict["train-eval-index"] = data_dict.pop("train_eval_index") + + +class SpaceCardData(CardData): + """Space Card Metadata that is used by Hugging Face Hub when included at the top of your README.md + + To get an exhaustive reference of Spaces configuration, please visit https://huggingface.co/docs/hub/spaces-config-reference#spaces-configuration-reference. + + Args: + title (`str`, *optional*) + Title of the Space. + sdk (`str`, *optional*) + SDK of the Space (one of `gradio`, `streamlit`, `docker`, or `static`). + sdk_version (`str`, *optional*) + Version of the used SDK (if Gradio/Streamlit sdk). + python_version (`str`, *optional*) + Python version used in the Space (if Gradio/Streamlit sdk). + app_file (`str`, *optional*) + Path to your main application file (which contains either gradio or streamlit Python code, or static html code). + Path is relative to the root of the repository. + app_port (`str`, *optional*) + Port on which your application is running. Used only if sdk is `docker`. + license (`str`, *optional*) + License of this model. Example: apache-2.0 or any license from + https://huggingface.co/docs/hub/repositories-licenses. + duplicated_from (`str`, *optional*) + ID of the original Space if this is a duplicated Space. + models (List[`str`], *optional*) + List of models related to this Space. Should be a dataset ID found on https://hf.co/models. + datasets (`List[str]`, *optional*) + List of datasets related to this Space. Should be a dataset ID found on https://hf.co/datasets. + tags (`List[str]`, *optional*) + List of tags to add to your Space that can be used when filtering on the Hub. + ignore_metadata_errors (`str`): + If True, errors while parsing the metadata section will be ignored. Some information might be lost during + the process. Use it at your own risk. + kwargs (`dict`, *optional*): + Additional metadata that will be added to the space card. + + Example: + ```python + >>> from huggingface_hub import SpaceCardData + >>> card_data = SpaceCardData( + ... title="Dreambooth Training", + ... license="mit", + ... sdk="gradio", + ... duplicated_from="multimodalart/dreambooth-training" + ... ) + >>> card_data.to_dict() + {'title': 'Dreambooth Training', 'sdk': 'gradio', 'license': 'mit', 'duplicated_from': 'multimodalart/dreambooth-training'} + ``` + """ + + def __init__( + self, + *, + title: Optional[str] = None, + sdk: Optional[str] = None, + sdk_version: Optional[str] = None, + python_version: Optional[str] = None, + app_file: Optional[str] = None, + app_port: Optional[int] = None, + license: Optional[str] = None, + duplicated_from: Optional[str] = None, + models: Optional[List[str]] = None, + datasets: Optional[List[str]] = None, + tags: Optional[List[str]] = None, + ignore_metadata_errors: bool = False, + **kwargs, + ): + self.title = title + self.sdk = sdk + self.sdk_version = sdk_version + self.python_version = python_version + self.app_file = app_file + self.app_port = app_port + self.license = license + self.duplicated_from = duplicated_from + self.models = models + self.datasets = datasets + self.tags = _to_unique_list(tags) + super().__init__(**kwargs) + + +def model_index_to_eval_results(model_index: List[Dict[str, Any]]) -> Tuple[str, List[EvalResult]]: + """Takes in a model index and returns the model name and a list of `huggingface_hub.EvalResult` objects. + + A detailed spec of the model index can be found here: + https://github.com/huggingface/hub-docs/blob/main/modelcard.md?plain=1 + + Args: + model_index (`List[Dict[str, Any]]`): + A model index data structure, likely coming from a README.md file on the + Hugging Face Hub. + + Returns: + model_name (`str`): + The name of the model as found in the model index. This is used as the + identifier for the model on leaderboards like PapersWithCode. + eval_results (`List[EvalResult]`): + A list of `huggingface_hub.EvalResult` objects containing the metrics + reported in the provided model_index. + + Example: + ```python + >>> from huggingface_hub.repocard_data import model_index_to_eval_results + >>> # Define a minimal model index + >>> model_index = [ + ... { + ... "name": "my-cool-model", + ... "results": [ + ... { + ... "task": { + ... "type": "image-classification" + ... }, + ... "dataset": { + ... "type": "beans", + ... "name": "Beans" + ... }, + ... "metrics": [ + ... { + ... "type": "accuracy", + ... "value": 0.9 + ... } + ... ] + ... } + ... ] + ... } + ... ] + >>> model_name, eval_results = model_index_to_eval_results(model_index) + >>> model_name + 'my-cool-model' + >>> eval_results[0].task_type + 'image-classification' + >>> eval_results[0].metric_type + 'accuracy' + + ``` + """ + + eval_results = [] + for elem in model_index: + name = elem["name"] + results = elem["results"] + for result in results: + task_type = result["task"]["type"] + task_name = result["task"].get("name") + dataset_type = result["dataset"]["type"] + dataset_name = result["dataset"]["name"] + dataset_config = result["dataset"].get("config") + dataset_split = result["dataset"].get("split") + dataset_revision = result["dataset"].get("revision") + dataset_args = result["dataset"].get("args") + source_name = result.get("source", {}).get("name") + source_url = result.get("source", {}).get("url") + + for metric in result["metrics"]: + metric_type = metric["type"] + metric_value = metric["value"] + metric_name = metric.get("name") + metric_args = metric.get("args") + metric_config = metric.get("config") + verified = metric.get("verified") + verify_token = metric.get("verifyToken") + + eval_result = EvalResult( + task_type=task_type, # Required + dataset_type=dataset_type, # Required + dataset_name=dataset_name, # Required + metric_type=metric_type, # Required + metric_value=metric_value, # Required + task_name=task_name, + dataset_config=dataset_config, + dataset_split=dataset_split, + dataset_revision=dataset_revision, + dataset_args=dataset_args, + metric_name=metric_name, + metric_args=metric_args, + metric_config=metric_config, + verified=verified, + verify_token=verify_token, + source_name=source_name, + source_url=source_url, + ) + eval_results.append(eval_result) + return name, eval_results + + +def _remove_none(obj): + """ + Recursively remove `None` values from a dict. Borrowed from: https://stackoverflow.com/a/20558778 + """ + if isinstance(obj, (list, tuple, set)): + return type(obj)(_remove_none(x) for x in obj if x is not None) + elif isinstance(obj, dict): + return type(obj)((_remove_none(k), _remove_none(v)) for k, v in obj.items() if k is not None and v is not None) + else: + return obj + + +def eval_results_to_model_index(model_name: str, eval_results: List[EvalResult]) -> List[Dict[str, Any]]: + """Takes in given model name and list of `huggingface_hub.EvalResult` and returns a + valid model-index that will be compatible with the format expected by the + Hugging Face Hub. + + Args: + model_name (`str`): + Name of the model (ex. "my-cool-model"). This is used as the identifier + for the model on leaderboards like PapersWithCode. + eval_results (`List[EvalResult]`): + List of `huggingface_hub.EvalResult` objects containing the metrics to be + reported in the model-index. + + Returns: + model_index (`List[Dict[str, Any]]`): The eval_results converted to a model-index. + + Example: + ```python + >>> from huggingface_hub.repocard_data import eval_results_to_model_index, EvalResult + >>> # Define minimal eval_results + >>> eval_results = [ + ... EvalResult( + ... task_type="image-classification", # Required + ... dataset_type="beans", # Required + ... dataset_name="Beans", # Required + ... metric_type="accuracy", # Required + ... metric_value=0.9, # Required + ... ) + ... ] + >>> eval_results_to_model_index("my-cool-model", eval_results) + [{'name': 'my-cool-model', 'results': [{'task': {'type': 'image-classification'}, 'dataset': {'name': 'Beans', 'type': 'beans'}, 'metrics': [{'type': 'accuracy', 'value': 0.9}]}]}] + + ``` + """ + + # Metrics are reported on a unique task-and-dataset basis. + # Here, we make a map of those pairs and the associated EvalResults. + task_and_ds_types_map: Dict[Any, List[EvalResult]] = defaultdict(list) + for eval_result in eval_results: + task_and_ds_types_map[eval_result.unique_identifier].append(eval_result) + + # Use the map from above to generate the model index data. + model_index_data = [] + for results in task_and_ds_types_map.values(): + # All items from `results` share same metadata + sample_result = results[0] + data = { + "task": { + "type": sample_result.task_type, + "name": sample_result.task_name, + }, + "dataset": { + "name": sample_result.dataset_name, + "type": sample_result.dataset_type, + "config": sample_result.dataset_config, + "split": sample_result.dataset_split, + "revision": sample_result.dataset_revision, + "args": sample_result.dataset_args, + }, + "metrics": [ + { + "type": result.metric_type, + "value": result.metric_value, + "name": result.metric_name, + "config": result.metric_config, + "args": result.metric_args, + "verified": result.verified, + "verifyToken": result.verify_token, + } + for result in results + ], + } + if sample_result.source_url is not None: + source = { + "url": sample_result.source_url, + } + if sample_result.source_name is not None: + source["name"] = sample_result.source_name + data["source"] = source + model_index_data.append(data) + + # TODO - Check if there cases where this list is longer than one? + # Finally, the model index itself is list of dicts. + model_index = [ + { + "name": model_name, + "results": model_index_data, + } + ] + return _remove_none(model_index) + + +def _to_unique_list(tags: Optional[List[str]]) -> Optional[List[str]]: + if tags is None: + return tags + unique_tags = [] # make tags unique + keep order explicitly + for tag in tags: + if tag not in unique_tags: + unique_tags.append(tag) + return unique_tags -- cgit v1.2.3