diff options
| author | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
|---|---|---|
| committer | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
| commit | 4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch) | |
| tree | ee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_indexes/input | |
| parent | cc961e04ba734dd72309fb548a2f97d67d578813 (diff) | |
| download | gn-ai-master.tar.gz | |
Diffstat (limited to '.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_indexes/input')
4 files changed, 145 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_indexes/input/__init__.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_indexes/input/__init__.py new file mode 100644 index 00000000..fdf8caba --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_indexes/input/__init__.py @@ -0,0 +1,5 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +__path__ = __import__("pkgutil").extend_path(__path__, __name__) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_indexes/input/_ai_search_config.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_indexes/input/_ai_search_config.py new file mode 100644 index 00000000..b2163c40 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_indexes/input/_ai_search_config.py @@ -0,0 +1,31 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# General todo: need to determine which args are required or optional when parsed out into groups like this. +# General todo: move these to more permanent locations? + +# Defines stuff related to the resulting created index, like the index type. + +from typing import Optional +from azure.ai.ml._utils._experimental import experimental + + +@experimental +class AzureAISearchConfig: + """Config class for creating an Azure AI Search index. + + :param index_name: The name of the Azure AI Search index. + :type index_name: Optional[str] + :param connection_id: The Azure AI Search connection ID. + :type connection_id: Optional[str] + """ + + def __init__( + self, + *, + index_name: Optional[str] = None, + connection_id: Optional[str] = None, + ) -> None: + self.index_name = index_name + self.connection_id = connection_id diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_indexes/input/_index_config.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_indexes/input/_index_config.py new file mode 100644 index 00000000..0eec691a --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_indexes/input/_index_config.py @@ -0,0 +1,47 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +from typing import Optional + + +class IndexConfig: # pylint: disable=too-many-instance-attributes + """Convenience class that contains all config values that for index creation that are + NOT specific to the index source data or the created index type. Meant for internal use only + to simplify function headers. The user-entry point is a function that + should still contain all the fields in this class as individual function parameters. + + Params omitted for brevity and to avoid maintaining duplicate docs. See index creation function + for actual parameter descriptions. + """ + + def __init__( + self, + *, + output_index_name: str, + vector_store: str, + data_source_url: Optional[str] = None, + chunk_size: Optional[int] = None, + chunk_overlap: Optional[int] = None, + input_glob: Optional[str] = None, + max_sample_files: Optional[int] = None, + chunk_prepend_summary: Optional[bool] = None, + document_path_replacement_regex: Optional[str] = None, + embeddings_container: Optional[str] = None, + embeddings_model: str, + aoai_connection_id: str, + _dry_run: bool = False + ): + self.output_index_name = output_index_name + self.vector_store = vector_store + self.data_source_url = data_source_url + self.chunk_size = chunk_size + self.chunk_overlap = chunk_overlap + self.input_glob = input_glob + self.max_sample_files = max_sample_files + self.chunk_prepend_summary = chunk_prepend_summary + self.document_path_replacement_regex = document_path_replacement_regex + self.embeddings_container = embeddings_container + self.embeddings_model = embeddings_model + self.aoai_connection_id = aoai_connection_id + self._dry_run = _dry_run diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_indexes/input/_index_data_source.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_indexes/input/_index_data_source.py new file mode 100644 index 00000000..92b62b6b --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_indexes/input/_index_data_source.py @@ -0,0 +1,62 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- +from typing import Union + +from azure.ai.ml._utils._experimental import experimental +from azure.ai.ml.entities._inputs_outputs import Input +from azure.ai.ml.constants._common import IndexInputType + + +# General todo: need to determine which args are required or optional when parsed out into groups like this. +# General todo: move these to more permanent locations? + + +# Defines stuff related to supplying inputs for an index AKA the base data. +@experimental +class IndexDataSource: + """Base class for configs that define data that will be processed into an ML index. + This class should not be instantiated directly. Use one of its child classes instead. + + :param input_type: A type enum describing the source of the index. Used to avoid + direct type checking. + :type input_type: Union[str, ~azure.ai.ml.constants._common.IndexInputType] + """ + + def __init__(self, *, input_type: Union[str, IndexInputType]): + self.input_type = input_type + + +# Field bundle for creating an index from files located in a Git repo. +# TODO Does git_url need to specifically be an SSH or HTTPS style link? +# TODO What is git connection id? +@experimental +class GitSource(IndexDataSource): + """Config class for creating an ML index from files located in a git repository. + + :param url: A link to the repository to use. + :type url: str + :param branch_name: The name of the branch to use from the target repository. + :type branch_name: str + :param connection_id: The connection ID for GitHub + :type connection_id: str + """ + + def __init__(self, *, url: str, branch_name: str, connection_id: str): + self.url = url + self.branch_name = branch_name + self.connection_id = connection_id + super().__init__(input_type=IndexInputType.GIT) + + +@experimental +class LocalSource(IndexDataSource): + """Config class for creating an ML index from a collection of local files. + + :param input_data: An input object describing the local location of index source files. + :type input_data: ~azure.ai.ml.Input + """ + + def __init__(self, *, input_data: str): # todo Make sure type of input_data is correct + self.input_data = Input(type="uri_folder", path=input_data) + super().__init__(input_type=IndexInputType.LOCAL) |
