aboutsummaryrefslogtreecommitdiff
path: root/.venv/lib/python3.12/site-packages/azure/ai/ml/automl
diff options
context:
space:
mode:
authorS. Solomon Darnell2025-03-28 21:52:21 -0500
committerS. Solomon Darnell2025-03-28 21:52:21 -0500
commit4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch)
treeee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/azure/ai/ml/automl
parentcc961e04ba734dd72309fb548a2f97d67d578813 (diff)
downloadgn-ai-master.tar.gz
two version of R2R are hereHEADmaster
Diffstat (limited to '.venv/lib/python3.12/site-packages/azure/ai/ml/automl')
-rw-r--r--.venv/lib/python3.12/site-packages/azure/ai/ml/automl/__init__.py144
-rw-r--r--.venv/lib/python3.12/site-packages/azure/ai/ml/automl/_automl_image.py298
-rw-r--r--.venv/lib/python3.12/site-packages/azure/ai/ml/automl/_automl_nlp.py175
-rw-r--r--.venv/lib/python3.12/site-packages/azure/ai/ml/automl/_automl_tabular.py445
4 files changed, 1062 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/automl/__init__.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/automl/__init__.py
new file mode 100644
index 00000000..d34e30a8
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/automl/__init__.py
@@ -0,0 +1,144 @@
+# ---------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# ---------------------------------------------------------
+
+"""Contains automated machine learning classes for Azure Machine Learning SDKv2.
+
+Main areas include managing AutoML tasks.
+"""
+from azure.ai.ml.entities._job.automl import TrainingSettings
+from azure.ai.ml.entities._job.automl.image import (
+ LogTrainingMetrics,
+ LogValidationLoss,
+ ImageClassificationJob,
+ ImageClassificationMultilabelJob,
+ ImageClassificationSearchSpace,
+ ImageInstanceSegmentationJob,
+ ImageLimitSettings,
+ ImageModelSettingsClassification,
+ ImageModelSettingsObjectDetection,
+ ImageObjectDetectionJob,
+ ImageObjectDetectionSearchSpace,
+ ImageSweepSettings,
+)
+from azure.ai.ml.entities._job.automl.nlp import (
+ NlpFeaturizationSettings,
+ NlpFixedParameters,
+ NlpLimitSettings,
+ NlpSearchSpace,
+ NlpSweepSettings,
+ TextClassificationJob,
+ TextClassificationMultilabelJob,
+ TextNerJob,
+)
+from azure.ai.ml.entities._job.automl.search_space import SearchSpace
+from azure.ai.ml.entities._job.automl.stack_ensemble_settings import (
+ StackEnsembleSettings,
+)
+from azure.ai.ml.entities._job.automl.tabular import (
+ ClassificationJob,
+ ColumnTransformer,
+ ForecastingJob,
+ ForecastingSettings,
+ RegressionJob,
+ TabularFeaturizationSettings,
+ TabularLimitSettings,
+)
+
+from .._restclient.v2023_04_01_preview.models import (
+ BlockedTransformers,
+ ClassificationModels,
+ ClassificationMultilabelPrimaryMetrics,
+ ClassificationPrimaryMetrics,
+ FeaturizationMode,
+ ForecastHorizonMode,
+ ForecastingModels,
+ ForecastingPrimaryMetrics,
+ InstanceSegmentationPrimaryMetrics,
+ LearningRateScheduler,
+ NCrossValidationsMode,
+ ObjectDetectionPrimaryMetrics,
+ RegressionModels,
+ RegressionPrimaryMetrics,
+ SamplingAlgorithmType,
+ ShortSeriesHandlingConfiguration,
+ StochasticOptimizer,
+ TargetAggregationFunction,
+ TargetLagsMode,
+ TargetRollingWindowSizeMode,
+ UseStl,
+ ValidationMetricType,
+)
+from ._automl_image import (
+ image_classification,
+ image_classification_multilabel,
+ image_instance_segmentation,
+ image_object_detection,
+)
+from ._automl_nlp import text_classification, text_classification_multilabel, text_ner
+from ._automl_tabular import classification, forecasting, regression
+
+__all__ = [
+ "ClassificationModels",
+ "RegressionModels",
+ "ForecastingModels",
+ "FeaturizationMode",
+ "NCrossValidationsMode",
+ "ForecastHorizonMode",
+ "ShortSeriesHandlingConfiguration",
+ "TargetLagsMode",
+ "TargetRollingWindowSizeMode",
+ "TargetAggregationFunction",
+ "UseStl",
+ "ClassificationPrimaryMetrics",
+ "RegressionPrimaryMetrics",
+ "ForecastingPrimaryMetrics",
+ "ClassificationMultilabelPrimaryMetrics",
+ "ObjectDetectionPrimaryMetrics",
+ "InstanceSegmentationPrimaryMetrics",
+ "ColumnTransformer",
+ "TabularFeaturizationSettings",
+ "ForecastingSettings",
+ "TabularLimitSettings",
+ "NlpFeaturizationSettings",
+ "NlpFixedParameters",
+ "NlpLimitSettings",
+ "NlpSweepSettings",
+ "NlpSearchSpace",
+ "LogTrainingMetrics",
+ "LogValidationLoss",
+ "ImageLimitSettings",
+ "ImageModelSettingsClassification",
+ "ImageModelSettingsObjectDetection",
+ "ImageSweepSettings",
+ "ImageObjectDetectionSearchSpace",
+ "ImageClassificationSearchSpace",
+ "TrainingSettings",
+ "image_classification",
+ "image_classification_multilabel",
+ "image_object_detection",
+ "image_instance_segmentation",
+ "text_classification",
+ "text_classification_multilabel",
+ "text_ner",
+ "classification",
+ "regression",
+ "forecasting",
+ "SearchSpace",
+ "StackEnsembleSettings",
+ "BlockedTransformers",
+ "ClassificationJob",
+ "ForecastingJob",
+ "RegressionJob",
+ "ImageClassificationJob",
+ "ImageClassificationMultilabelJob",
+ "ImageObjectDetectionJob",
+ "ImageInstanceSegmentationJob",
+ "LearningRateScheduler",
+ "SamplingAlgorithmType",
+ "StochasticOptimizer",
+ "TextClassificationJob",
+ "TextClassificationMultilabelJob",
+ "TextNerJob",
+ "ValidationMetricType",
+]
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/automl/_automl_image.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/automl/_automl_image.py
new file mode 100644
index 00000000..521c9aa2
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/automl/_automl_image.py
@@ -0,0 +1,298 @@
+# ---------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# ---------------------------------------------------------
+
+"""Entrypoints for creating AutoML tasks."""
+
+from typing import Optional, TypeVar, Union
+
+from azure.ai.ml._restclient.v2023_04_01_preview.models import (
+ ClassificationMultilabelPrimaryMetrics,
+ ClassificationPrimaryMetrics,
+ InstanceSegmentationPrimaryMetrics,
+ ObjectDetectionPrimaryMetrics,
+)
+from azure.ai.ml.entities._builders.base_node import pipeline_node_decorator
+from azure.ai.ml.entities._inputs_outputs import Input
+from azure.ai.ml.entities._job.automl.image.automl_image_object_detection_base import AutoMLImageObjectDetectionBase
+from azure.ai.ml.entities._job.automl.image.image_classification_job import ImageClassificationJob
+from azure.ai.ml.entities._job.automl.image.image_classification_multilabel_job import ImageClassificationMultilabelJob
+from azure.ai.ml.entities._job.automl.image.image_instance_segmentation_job import ImageInstanceSegmentationJob
+from azure.ai.ml.entities._job.automl.image.image_object_detection_job import ImageObjectDetectionJob
+
+TImageJob = TypeVar("TImageJob", bound=AutoMLImageObjectDetectionBase)
+
+
+def _create_image_job(
+ job_cls: TImageJob,
+ training_data: Input,
+ target_column_name: str,
+ primary_metric: Optional[Union[str, ClassificationPrimaryMetrics]] = None,
+ validation_data: Optional[Input] = None,
+ validation_data_size: Optional[float] = None,
+ **kwargs,
+) -> TImageJob:
+ """Helper function to create objects for AutoML Image jobs.
+
+ :param job_cls: The job class
+ :type job_cls: TImageJob
+ :param training_data: The training input data
+ :type training_data: ~azure.ai.ml.entities.Input
+ :param target_column_name: The target column name
+ :type target_column_name: str
+ :param primary_metric: The primary metric
+ :type primary_metric: Optional[Union[str, ~azure.ai.ml.automl.ClassificationPrimaryMetrics]]
+ :param validation_data: The validation data
+ :type validation_data: Optional[~azure.ai.ml.entities.Input]
+ :param validation_data_size: The validation data size
+ :type validation_data_size: Optional[float]
+ :return: An AutoML Image Job
+ :rtype: TImageJob
+ """
+ image_job = job_cls(primary_metric=primary_metric, **kwargs) # type: ignore[operator]
+ image_job.set_data(
+ training_data=training_data,
+ target_column_name=target_column_name,
+ validation_data=validation_data,
+ validation_data_size=validation_data_size,
+ )
+
+ return image_job
+
+
+@pipeline_node_decorator
+def image_classification(
+ *,
+ training_data: Input,
+ target_column_name: str,
+ primary_metric: Optional[Union[str, ClassificationPrimaryMetrics]] = None,
+ validation_data: Optional[Input] = None,
+ validation_data_size: Optional[float] = None,
+ **kwargs,
+) -> ImageClassificationJob:
+ """Creates an object for AutoML Image multi-class Classification job.
+
+ :keyword training_data: The training data to be used within the experiment.
+ :paramtype training_data: ~azure.ai.ml.entities.Input
+ :keyword target_column_name: The name of the label column.
+ This parameter is applicable to ``training_data`` and ``validation_data`` parameters.
+ :paramtype target_column_name: str
+ :keyword primary_metric: The metric that Automated Machine Learning will optimize for model selection.
+ Automated Machine Learning collects more metrics than it can optimize.
+ For more information on how metrics are calculated, see
+ https://learn.microsoft.com/azure/machine-learning/how-to-configure-auto-train#primary-metric.
+
+ Acceptable values: accuracy, AUC_weighted, norm_macro_recall, average_precision_score_weighted,
+ and precision_score_weighted
+ Defaults to accuracy.
+ :paramtype primary_metric: Union[str, ~azure.ai.ml.automl.ClassificationPrimaryMetrics]
+ :keyword validation_data: The validation data to be used within the experiment.
+ :paramtype validation_data: Optional[~azure.ai.ml.entities.Input]
+ :keyword validation_data_size: What fraction of the data to hold out for validation when user validation data
+ is not specified. This should be between 0.0 and 1.0 non-inclusive.
+
+ Specify ``validation_data`` to provide validation data, otherwise set ``validation_data_size``
+ to extract validation data out of the specified training data.
+
+ Defaults to .2
+ :paramtype validation_data_size: float
+
+ :return: Image classification job object that can be submitted to an Azure ML compute for execution.
+ :rtype: ~azure.ai.ml.automl.ImageClassificationJob
+
+ .. admonition:: Example:
+
+ .. literalinclude:: ../samples/ml_samples_automl_image.py
+ :start-after: [START automl.image_classification]
+ :end-before: [END automl.image_classification]
+ :language: python
+ :dedent: 8
+ :caption: creating an automl image classification job
+ """
+ return _create_image_job( # type: ignore[type-var, return-value]
+ job_cls=ImageClassificationJob,
+ training_data=training_data,
+ target_column_name=target_column_name,
+ primary_metric=primary_metric,
+ validation_data=validation_data,
+ validation_data_size=validation_data_size,
+ **kwargs,
+ )
+
+
+@pipeline_node_decorator
+def image_classification_multilabel(
+ *,
+ training_data: Input,
+ target_column_name: str,
+ primary_metric: Optional[Union[str, ClassificationMultilabelPrimaryMetrics]] = None,
+ validation_data: Optional[Input] = None,
+ validation_data_size: Optional[float] = None,
+ **kwargs,
+) -> ImageClassificationMultilabelJob:
+ """Creates an object for AutoML Image multi-label Classification job.
+
+ :keyword training_data: The training data to be used within the experiment.
+ :paramtype training_data: ~azure.ai.ml.entities.Input
+ :keyword target_column_name: The name of the label column.
+ This parameter is applicable to ``training_data`` and ``validation_data`` parameters.
+ :paramtype target_column_name: str
+ :keyword primary_metric: The metric that Automated Machine Learning will optimize for model selection.
+ Automated Machine Learning collects more metrics than it can optimize.
+ For more information on how metrics are calculated, see
+ https://learn.microsoft.com/azure/machine-learning/how-to-configure-auto-train#primary-metric.
+
+ Acceptable values: accuracy, AUC_weighted, norm_macro_recall, average_precision_score_weighted,
+ precision_score_weighted, and Iou
+ Defaults to Iou.
+ :paramtype primary_metric: Union[str, ~azure.ai.ml.automl.ClassificationMultilabelPrimaryMetrics]
+ :keyword validation_data: The validation data to be used within the experiment.
+ :paramtype validation_data: Optional[~azure.ai.ml.entities.Input]
+ :keyword validation_data_size: The fraction of the training data to hold out for validation when user does not
+ provide the validation data. This should be between 0.0 and 1.0 non-inclusive.
+
+ Specify ``validation_data`` to provide validation data, otherwise set ``validation_data_size``
+ to extract validation data out of the specified training data.
+
+ Defaults to .2
+ :paramtype validation_data_size: float
+
+ :return: Image multi-label classification job object that can be submitted to an Azure ML compute for execution.
+ :rtype: ~azure.ai.ml.automl.ImageClassificationMultilabelJob
+
+ .. admonition:: Example:
+
+ .. literalinclude:: ../samples/ml_samples_automl_image.py
+ :start-after: [START automl.image_classification_multilabel]
+ :end-before: [END automl.image_classification_multilabel]
+ :language: python
+ :dedent: 8
+ :caption: creating an automl image multilabel classification job
+ """
+ return _create_image_job( # type: ignore[type-var, return-value]
+ job_cls=ImageClassificationMultilabelJob,
+ training_data=training_data,
+ target_column_name=target_column_name,
+ primary_metric=primary_metric,
+ validation_data=validation_data,
+ validation_data_size=validation_data_size,
+ **kwargs,
+ )
+
+
+@pipeline_node_decorator
+def image_object_detection(
+ *,
+ training_data: Input,
+ target_column_name: str,
+ primary_metric: Optional[Union[str, ObjectDetectionPrimaryMetrics]] = None,
+ validation_data: Optional[Input] = None,
+ validation_data_size: Optional[float] = None,
+ **kwargs,
+) -> ImageObjectDetectionJob:
+ """Creates an object for AutoML Image Object Detection job.
+
+ :keyword training_data: The training data to be used within the experiment.
+ :paramtype training_data: ~azure.ai.ml.entities.Input
+ :keyword target_column_name: The name of the label column.
+ This parameter is applicable to ``training_data`` and ``validation_data`` parameters.
+ :paramtype target_column_name: str
+ :keyword primary_metric: The metric that Automated Machine Learning will optimize for model selection.
+ Automated Machine Learning collects more metrics than it can optimize.
+ For more information on how metrics are calculated, see
+ https://learn.microsoft.com/azure/machine-learning/how-to-configure-auto-train#primary-metric.
+
+ Acceptable values: MeanAveragePrecision
+ Defaults to MeanAveragePrecision.
+ :paramtype primary_metric: Union[str, ~azure.ai.ml.automl.ObjectDetectionPrimaryMetrics]
+ :keyword validation_data: The validation data to be used within the experiment.
+ :paramtype validation_data: Optional[~azure.ai.ml.entities.Input]
+ :keyword validation_data_size: The fraction of the training data to hold out for validation when user does not
+ provide the validation data. This should be between 0.0 and 1.0 non-inclusive.
+
+ Specify ``validation_data`` to provide validation data, otherwise set ``validation_data_size``
+ to extract validation data out of the specified training data.
+
+ Defaults to .2
+ :paramtype validation_data_size: float
+
+ :return: Image object detection job object that can be submitted to an Azure ML compute for execution.
+ :rtype: ~azure.ai.ml.automl.ImageObjectDetectionJob
+
+ .. admonition:: Example:
+
+ .. literalinclude:: ../samples/ml_samples_automl_image.py
+ :start-after: [START automl.image_object_detection]
+ :end-before: [END automl.image_object_detection]
+ :language: python
+ :dedent: 8
+ :caption: creating an automl image object detection job
+ """
+ return _create_image_job( # type: ignore[type-var, return-value]
+ job_cls=ImageObjectDetectionJob,
+ training_data=training_data,
+ target_column_name=target_column_name,
+ primary_metric=primary_metric,
+ validation_data=validation_data,
+ validation_data_size=validation_data_size,
+ **kwargs,
+ )
+
+
+@pipeline_node_decorator
+def image_instance_segmentation(
+ *,
+ training_data: Input,
+ target_column_name: str,
+ primary_metric: Optional[Union[str, InstanceSegmentationPrimaryMetrics]] = None,
+ validation_data: Optional[Input] = None,
+ validation_data_size: Optional[float] = None,
+ **kwargs,
+) -> ImageInstanceSegmentationJob:
+ """Creates an object for AutoML Image Instance Segmentation job.
+
+ :keyword training_data: The training data to be used within the experiment.
+ :paramtype training_data: ~azure.ai.ml.entities.Input
+ :keyword target_column_name: The name of the label column.
+ This parameter is applicable to ``training_data`` and ``validation_data`` parameters.
+ :paramtype target_column_name: str
+ :keyword primary_metric: The metric that Automated Machine Learning will optimize for model selection.
+ Automated Machine Learning collects more metrics than it can optimize.
+ For more information on how metrics are calculated, see
+ https://learn.microsoft.com/azure/machine-learning/how-to-configure-auto-train#primary-metric.
+
+ Acceptable values: MeanAveragePrecision
+ Defaults to MeanAveragePrecision.
+ :paramtype primary_metric: Union[str, ~azure.ai.ml.automl.InstanceSegmentationPrimaryMetrics]
+ :keyword validation_data: The validation data to be used within the experiment.
+ :paramtype validation_data: Optional[~azure.ai.ml.entities.Input]
+ :keyword validation_data_size: The fraction of the training data to hold out for validation when user does not
+ provide the validation data. This should be between 0.0 and 1.0 non-inclusive.
+
+ Specify ``validation_data`` to provide validation data, otherwise set ``validation_data_size``
+ to extract validation data out of the specified training data.
+
+ Defaults to .2
+ :paramtype validation_data_size: float
+
+ :return: Image instance segmentation job
+ :rtype: ~azure.ai.ml.automl.ImageInstanceSegmentationJob
+
+ .. admonition:: Example:
+
+ .. literalinclude:: ../samples/ml_samples_automl_image.py
+ :start-after: [START automl.image_instance_segmentation]
+ :end-before: [END automl.image_instance_segmentation]
+ :language: python
+ :dedent: 8
+ :caption: creating an automl image instance segmentation job
+ """
+ return _create_image_job( # type: ignore[type-var, return-value]
+ job_cls=ImageInstanceSegmentationJob,
+ training_data=training_data,
+ target_column_name=target_column_name,
+ primary_metric=primary_metric,
+ validation_data=validation_data,
+ validation_data_size=validation_data_size,
+ **kwargs,
+ )
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/automl/_automl_nlp.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/automl/_automl_nlp.py
new file mode 100644
index 00000000..ac7ebcf6
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/automl/_automl_nlp.py
@@ -0,0 +1,175 @@
+# ---------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# ---------------------------------------------------------
+
+"""Entrypoints for creating AutoML tasks."""
+
+from typing import Optional
+
+from azure.ai.ml.entities._builders.base_node import pipeline_node_decorator
+from azure.ai.ml.entities._inputs_outputs import Input
+from azure.ai.ml.entities._job.automl.nlp.text_classification_job import TextClassificationJob
+from azure.ai.ml.entities._job.automl.nlp.text_classification_multilabel_job import TextClassificationMultilabelJob
+from azure.ai.ml.entities._job.automl.nlp.text_ner_job import TextNerJob
+
+
+@pipeline_node_decorator
+def text_classification(
+ *,
+ training_data: Input,
+ target_column_name: str,
+ validation_data: Input,
+ primary_metric: Optional[str] = None,
+ log_verbosity: Optional[str] = None,
+ **kwargs,
+) -> TextClassificationJob:
+ """Function to create a TextClassificationJob.
+
+ A text classification job is used to train a model that can predict the class/category of a text data.
+ Input training data should include a target column that classifies the text into exactly one class.
+
+ :keyword training_data: The training data to be used within the experiment.
+ It should contain both training features and a target column.
+ :paramtype training_data: Input
+ :keyword target_column_name: Name of the target column.
+ :paramtype target_column_name: str
+ :keyword validation_data: The validation data to be used within the experiment.
+ It should contain both training features and a target column.
+ :paramtype validation_data: Input
+ :keyword primary_metric: Primary metric for the task.
+ Acceptable values: accuracy, AUC_weighted, precision_score_weighted
+ :paramtype primary_metric: Union[str, ClassificationPrimaryMetrics]
+ :keyword log_verbosity: Log verbosity level.
+ :paramtype log_verbosity: str
+
+ :return: The TextClassificationJob object.
+ :rtype: TextClassificationJob
+
+ .. admonition:: Example:
+
+ .. literalinclude:: ../samples/ml_samples_automl_nlp.py
+ :start-after: [START automl.text_classification]
+ :end-before: [END automl.text_classification]
+ :language: python
+ :dedent: 8
+ :caption: creating an automl text classification job
+ """
+
+ text_classification_job = TextClassificationJob(
+ primary_metric=primary_metric,
+ training_data=training_data,
+ target_column_name=target_column_name,
+ validation_data=validation_data,
+ log_verbosity=log_verbosity,
+ **kwargs,
+ )
+
+ return text_classification_job
+
+
+@pipeline_node_decorator
+def text_classification_multilabel(
+ *,
+ training_data: Input,
+ target_column_name: str,
+ validation_data: Input,
+ primary_metric: Optional[str] = None,
+ log_verbosity: Optional[str] = None,
+ **kwargs,
+) -> TextClassificationMultilabelJob:
+ """Function to create a TextClassificationMultilabelJob.
+
+ A text classification multilabel job is used to train a model that can predict the classes/categories
+ of a text data. Input training data should include a target column that classifies the text into class(es).
+ For more information on format of multilabel data, refer to:
+ https://learn.microsoft.com/azure/machine-learning/how-to-auto-train-nlp-models#multi-label
+
+ :keyword training_data: The training data to be used within the experiment.
+ It should contain both training features and a target column.
+ :paramtype training_data: Input
+ :keyword target_column_name: Name of the target column.
+ :paramtype target_column_name: str
+ :keyword validation_data: The validation data to be used within the experiment.
+ It should contain both training features and a target column.
+ :paramtype validation_data: Input
+ :keyword primary_metric: Primary metric for the task.
+ Acceptable values: accuracy
+ :paramtype primary_metric: str
+ :keyword log_verbosity: Log verbosity level.
+ :paramtype log_verbosity: str
+
+ :return: The TextClassificationMultilabelJob object.
+ :rtype: TextClassificationMultilabelJob
+
+ .. admonition:: Example:
+
+ .. literalinclude:: ../samples/ml_samples_automl_nlp.py
+ :start-after: [START automl.text_classification_multilabel]
+ :end-before: [END automl.text_classification_multilabel]
+ :language: python
+ :dedent: 8
+ :caption: creating an automl text multilabel classification job
+ """
+
+ text_classification_multilabel_job = TextClassificationMultilabelJob(
+ primary_metric=primary_metric,
+ training_data=training_data,
+ target_column_name=target_column_name,
+ validation_data=validation_data,
+ log_verbosity=log_verbosity,
+ **kwargs,
+ )
+
+ return text_classification_multilabel_job
+
+
+@pipeline_node_decorator
+def text_ner(
+ *,
+ training_data: Input,
+ validation_data: Input,
+ primary_metric: Optional[str] = None,
+ log_verbosity: Optional[str] = None,
+ **kwargs,
+) -> TextNerJob:
+ """Function to create a TextNerJob.
+
+ A text named entity recognition job is used to train a model that can predict the named entities in the text.
+ Input training data should be a text file in CoNLL format. For more information on format of text NER data,
+ refer to:
+ https://learn.microsoft.com/azure/machine-learning/how-to-auto-train-nlp-models#named-entity-recognition-ner
+
+ :keyword training_data: The training data to be used within the experiment.
+ It should contain both training features and a target column.
+ :paramtype training_data: Input
+ :keyword validation_data: The validation data to be used within the experiment.
+ It should contain both training features and a target column.
+ :paramtype validation_data: Input
+ :keyword primary_metric: Primary metric for the task.
+ Acceptable values: accuracy
+ :paramtype primary_metric: str
+ :keyword log_verbosity: Log verbosity level.
+ :paramtype log_verbosity: str
+
+ :return: The TextNerJob object.
+ :rtype: TextNerJob
+
+ .. admonition:: Example:
+
+ .. literalinclude:: ../samples/ml_samples_automl_nlp.py
+ :start-after: [START automl.text_ner]
+ :end-before: [END automl.text_ner]
+ :language: python
+ :dedent: 8
+ :caption: creating an automl text ner job
+ """
+
+ text_ner_job = TextNerJob(
+ primary_metric=primary_metric,
+ training_data=training_data,
+ validation_data=validation_data,
+ log_verbosity=log_verbosity,
+ **kwargs,
+ )
+
+ return text_ner_job
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/automl/_automl_tabular.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/automl/_automl_tabular.py
new file mode 100644
index 00000000..9c45e5f1
--- /dev/null
+++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/automl/_automl_tabular.py
@@ -0,0 +1,445 @@
+# ---------------------------------------------------------
+# Copyright (c) Microsoft Corporation. All rights reserved.
+# ---------------------------------------------------------
+
+"""Entrypoints for creating AutoML tasks."""
+from typing import List, Optional, Union
+
+from azure.ai.ml.entities._builders.base_node import pipeline_node_decorator
+from azure.ai.ml.entities._inputs_outputs import Input
+from azure.ai.ml.entities._job.automl.tabular import (
+ ClassificationJob,
+ ForecastingJob,
+ ForecastingSettings,
+ RegressionJob,
+)
+
+
+@pipeline_node_decorator
+def classification(
+ *,
+ training_data: Input,
+ target_column_name: str,
+ primary_metric: Optional[str] = None,
+ enable_model_explainability: Optional[bool] = None,
+ weight_column_name: Optional[str] = None,
+ validation_data: Optional[Input] = None,
+ validation_data_size: Optional[float] = None,
+ n_cross_validations: Optional[Union[str, int]] = None,
+ cv_split_column_names: Optional[List[str]] = None,
+ test_data: Optional[Input] = None,
+ test_data_size: Optional[float] = None,
+ **kwargs,
+) -> ClassificationJob:
+ """Function to create a ClassificationJob.
+
+ A classification job is used to train a model that best predict the class of a data sample.
+ Various models are trained using the training data. The model with the best performance on the validation data
+ based on the primary metric is selected as the final model.
+
+ :keyword training_data: The training data to be used within the experiment.
+ It should contain both training features and a label column (optionally a sample weights column).
+ :paramtype training_data: Input
+ :keyword target_column_name: The name of the label column.
+ This parameter is applicable to ``training_data``, ``validation_data`` and ``test_data`` parameters
+ :paramtype target_column_name: str
+ :keyword primary_metric: The metric that Automated Machine Learning will optimize for model selection.
+ Automated Machine Learning collects more metrics than it can optimize.
+ For more information on how metrics are calculated, see
+ https://learn.microsoft.com/azure/machine-learning/how-to-configure-auto-train#primary-metric.
+
+ Acceptable values: accuracy, AUC_weighted, norm_macro_recall, average_precision_score_weighted,
+ and precision_score_weighted
+ Defaults to accuracy
+ :type primary_metric: str
+ :keyword enable_model_explainability: Whether to enable explaining the best AutoML model at the end of all AutoML
+ training iterations.
+ The default is None. For more information, see
+ `Interpretability: model explanations in automated machine learning
+ <https://learn.microsoft.com/azure/machine-learning/how-to-machine-learning-interpretability-automl>`__.
+ :paramtype enable_model_explainability: bool
+ :keyword weight_column_name: The name of the sample weight column. Automated ML supports a weighted column
+ as an input, causing rows in the data to be weighted up or down.
+ If the input data is from a pandas.DataFrame which doesn't have column names,
+ column indices can be used instead, expressed as integers.
+
+ This parameter is applicable to ``training_data`` and ``validation_data`` parameters
+ :paramtype weight_column_name: str
+ :keyword validation_data: The validation data to be used within the experiment.
+ It should contain both training features and label column (optionally a sample weights column).
+
+ Defaults to None
+ :paramtype validation_data: Input
+ :keyword validation_data_size: What fraction of the data to hold out for validation when user validation data
+ is not specified. This should be between 0.0 and 1.0 non-inclusive.
+
+ Specify ``validation_data`` to provide validation data, otherwise set ``n_cross_validations`` or
+ ``validation_data_size`` to extract validation data out of the specified training data.
+ For custom cross validation fold, use ``cv_split_column_names``.
+
+ For more information, see
+ `Configure data splits and cross-validation in automated machine learning <https://learn.microsoft.com
+ /azure/machine-learning/how-to-configure-cross-validation-data-splits>`__.
+
+ Defaults to None
+ :paramtype validation_data_size: float
+ :keyword n_cross_validations: How many cross validations to perform when user validation data is not specified.
+
+ Specify ``validation_data`` to provide validation data, otherwise set ``n_cross_validations`` or
+ ``validation_data_size`` to extract validation data out of the specified training data.
+ For custom cross validation fold, use ``cv_split_column_names``.
+
+ For more information, see
+ `Configure data splits and cross-validation in automated machine learning <https://learn.microsoft.com
+ /azure/machine-learning/how-to-configure-cross-validation-data-splits>`__.
+
+ Defaults to None
+ :paramtype n_cross_validations: Union[str, int]
+ :keyword cv_split_column_names: List of names of the columns that contain custom cross validation split.
+ Each of the CV split columns represents one CV split where each row are either marked
+ 1 for training or 0 for validation.
+
+ Defaults to None
+ :paramtype cv_split_column_names: List[str]
+ :keyword test_data: The Model Test feature using test datasets or test data splits is a feature in
+ Preview state and might change at any time.
+ The test data to be used for a test run that will automatically be started after
+ model training is complete. The test run will get predictions using the best model
+ and will compute metrics given these predictions.
+
+ If this parameter or the ``test_data_size`` parameter are not specified then
+ no test run will be executed automatically after model training is completed.
+ Test data should contain both features and label column.
+ If ``test_data`` is specified then the ``target_column_name`` parameter must be specified.
+
+ Defaults to None
+ :paramtype test_data: Input
+ :keyword test_data_size: The Model Test feature using test datasets or test data splits is a feature in
+ Preview state and might change at any time.
+ What fraction of the training data to hold out for test data for a test run that will
+ automatically be started after model training is complete. The test run will get
+ predictions using the best model and will compute metrics given these predictions.
+
+ This should be between 0.0 and 1.0 non-inclusive.
+ If ``test_data_size`` is specified at the same time as ``validation_data_size``,
+ then the test data is split from ``training_data`` before the validation data is split.
+ For example, if ``validation_data_size=0.1``, ``test_data_size=0.1`` and the original training data has
+ 1000 rows, then the test data will have 100 rows, the validation data will contain 90 rows and the
+ training data will have 810 rows.
+
+ For regression based tasks, random sampling is used. For classification tasks, stratified sampling
+ is used. Forecasting does not currently support specifying a test dataset using a train/test split.
+
+ If this parameter or the ``test_data`` parameter are not specified then
+ no test run will be executed automatically after model training is completed.
+
+ Defaults to None
+ :paramtype test_data_size: float
+ :return: A job object that can be submitted to an Azure ML compute for execution.
+ :rtype: ClassificationJob
+ """
+ classification_job = ClassificationJob(primary_metric=primary_metric, **kwargs)
+
+ classification_job.set_data(
+ training_data=training_data,
+ target_column_name=target_column_name,
+ weight_column_name=weight_column_name,
+ validation_data=validation_data,
+ validation_data_size=validation_data_size,
+ n_cross_validations=n_cross_validations,
+ cv_split_column_names=cv_split_column_names,
+ test_data=test_data,
+ test_data_size=test_data_size,
+ )
+ classification_job.set_training(enable_model_explainability=enable_model_explainability)
+
+ return classification_job
+
+
+@pipeline_node_decorator
+def regression(
+ *,
+ training_data: Input,
+ target_column_name: str,
+ primary_metric: Optional[str] = None,
+ enable_model_explainability: Optional[bool] = None,
+ weight_column_name: Optional[str] = None,
+ validation_data: Optional[Input] = None,
+ validation_data_size: Optional[float] = None,
+ n_cross_validations: Optional[Union[str, int]] = None,
+ cv_split_column_names: Optional[List[str]] = None,
+ test_data: Optional[Input] = None,
+ test_data_size: Optional[float] = None,
+ **kwargs,
+) -> RegressionJob:
+ """Function to create a Regression Job.
+
+ A regression job is used to train a model to predict continuous values of a target variable from a dataset.
+ Various models are trained using the training data. The model with the best performance on the validation data
+ based on the primary metric is selected as the final model.
+
+
+ :keyword training_data: The training data to be used within the experiment.
+ It should contain both training features and a label column (optionally a sample weights column).
+ :paramtype training_data: Input
+ :keyword target_column_name: The name of the label column.
+ This parameter is applicable to ``training_data``, ``validation_data`` and ``test_data`` parameters
+ :paramtype target_column_name: str
+ :keyword primary_metric: The metric that Automated Machine Learning will optimize for model selection.
+ Automated Machine Learning collects more metrics than it can optimize.
+ For more information on how metrics are calculated, see
+ https://learn.microsoft.com/azure/machine-learning/how-to-configure-auto-train#primary-metric.
+
+ Acceptable values: spearman_correlation, r2_score, normalized_mean_absolute_error,
+ normalized_root_mean_squared_error.
+ Defaults to normalized_root_mean_squared_error
+ :type primary_metric: str
+ :keyword enable_model_explainability: Whether to enable explaining the best AutoML model at the end of all AutoML
+ training iterations.
+ The default is None. For more information, see
+ `Interpretability: model explanations in automated machine learning
+ <https://learn.microsoft.com/azure/machine-learning/how-to-machine-learning-interpretability-automl>`__.
+ :paramtype enable_model_explainability: bool
+ :keyword weight_column_name: The name of the sample weight column. Automated ML supports a weighted column
+ as an input, causing rows in the data to be weighted up or down.
+ If the input data is from a pandas.DataFrame which doesn't have column names,
+ column indices can be used instead, expressed as integers.
+
+ This parameter is applicable to ``training_data`` and ``validation_data`` parameters
+ :paramtype weight_column_name: str
+ :keyword validation_data: The validation data to be used within the experiment.
+ It should contain both training features and label column (optionally a sample weights column).
+
+ Defaults to None
+ :paramtype validation_data: Input
+ :keyword validation_data_size: What fraction of the data to hold out for validation when user validation data
+ is not specified. This should be between 0.0 and 1.0 non-inclusive.
+
+ Specify ``validation_data`` to provide validation data, otherwise set ``n_cross_validations`` or
+ ``validation_data_size`` to extract validation data out of the specified training data.
+ For custom cross validation fold, use ``cv_split_column_names``.
+
+ For more information, see
+ `Configure data splits and cross-validation in automated machine learning <https://learn.microsoft.com
+ /azure/machine-learning/how-to-configure-cross-validation-data-splits>`__.
+
+ Defaults to None
+ :paramtype validation_data_size: float
+ :keyword n_cross_validations: How many cross validations to perform when user validation data is not specified.
+
+ Specify ``validation_data`` to provide validation data, otherwise set ``n_cross_validations`` or
+ ``validation_data_size`` to extract validation data out of the specified training data.
+ For custom cross validation fold, use ``cv_split_column_names``.
+
+ For more information, see
+ `Configure data splits and cross-validation in automated machine learning <https://learn.microsoft.com
+ /azure/machine-learning/how-to-configure-cross-validation-data-splits>`__.
+
+ Defaults to None
+ :paramtype n_cross_validations: Union[str, int]
+ :keyword cv_split_column_names: List of names of the columns that contain custom cross validation split.
+ Each of the CV split columns represents one CV split where each row are either marked
+ 1 for training or 0 for validation.
+
+ Defaults to None
+ :paramtype cv_split_column_names: List[str]
+ :keyword test_data: The Model Test feature using test datasets or test data splits is a feature in
+ Preview state and might change at any time.
+ The test data to be used for a test run that will automatically be started after
+ model training is complete. The test run will get predictions using the best model
+ and will compute metrics given these predictions.
+
+ If this parameter or the ``test_data_size`` parameter are not specified then
+ no test run will be executed automatically after model training is completed.
+ Test data should contain both features and label column.
+ If ``test_data`` is specified then the ``target_column_name`` parameter must be specified.
+
+ Defaults to None
+ :paramtype test_data: Input
+ :keyword test_data_size: The Model Test feature using test datasets or test data splits is a feature in
+ Preview state and might change at any time.
+ What fraction of the training data to hold out for test data for a test run that will
+ automatically be started after model training is complete. The test run will get
+ predictions using the best model and will compute metrics given these predictions.
+
+ This should be between 0.0 and 1.0 non-inclusive.
+ If ``test_data_size`` is specified at the same time as ``validation_data_size``,
+ then the test data is split from ``training_data`` before the validation data is split.
+ For example, if ``validation_data_size=0.1``, ``test_data_size=0.1`` and the original training data has
+ 1000 rows, then the test data will have 100 rows, the validation data will contain 90 rows
+ and the training data will have 810 rows.
+
+ For regression based tasks, random sampling is used. For classification
+ tasks, stratified sampling is used. Forecasting does not currently
+ support specifying a test dataset using a train/test split.
+
+ If this parameter or the ``test_data`` parameter are not specified then
+ no test run will be executed automatically after model training is completed.
+
+ Defaults to None
+ :paramtype test_data_size: float
+ :return: A job object that can be submitted to an Azure ML compute for execution.
+ :rtype: RegressionJob
+ """
+ regression_job = RegressionJob(primary_metric=primary_metric, **kwargs)
+ regression_job.set_data(
+ training_data=training_data,
+ target_column_name=target_column_name,
+ weight_column_name=weight_column_name,
+ validation_data=validation_data,
+ validation_data_size=validation_data_size,
+ n_cross_validations=n_cross_validations,
+ cv_split_column_names=cv_split_column_names,
+ test_data=test_data,
+ test_data_size=test_data_size,
+ )
+ regression_job.set_training(enable_model_explainability=enable_model_explainability)
+
+ return regression_job
+
+
+@pipeline_node_decorator
+def forecasting(
+ *,
+ training_data: Input,
+ target_column_name: str,
+ primary_metric: Optional[str] = None,
+ enable_model_explainability: Optional[bool] = None,
+ weight_column_name: Optional[str] = None,
+ validation_data: Optional[Input] = None,
+ validation_data_size: Optional[float] = None,
+ n_cross_validations: Optional[Union[str, int]] = None,
+ cv_split_column_names: Optional[List[str]] = None,
+ test_data: Optional[Input] = None,
+ test_data_size: Optional[float] = None,
+ forecasting_settings: Optional[ForecastingSettings] = None,
+ **kwargs,
+) -> ForecastingJob:
+ """Function to create a Forecasting job.
+
+ A forecasting task is used to predict target values for a future time period based on the historical data.
+ Various models are trained using the training data. The model with the best performance on the validation data
+ based on the primary metric is selected as the final model.
+
+ :keyword training_data: The training data to be used within the experiment.
+ It should contain both training features and a label column (optionally a sample weights column).
+ :paramtype training_data: Input
+ :keyword target_column_name: The name of the label column.
+ This parameter is applicable to ``training_data``, ``validation_data`` and ``test_data`` parameters
+ :paramtype target_column_name: str
+ :keyword primary_metric: The metric that Automated Machine Learning will optimize for model selection.
+ Automated Machine Learning collects more metrics than it can optimize.
+ For more information on how metrics are calculated, see
+ https://learn.microsoft.com/azure/machine-learning/how-to-configure-auto-train#primary-metric.
+
+ Acceptable values: r2_score, normalized_mean_absolute_error, normalized_root_mean_squared_error
+ Defaults to normalized_root_mean_squared_error
+ :type primary_metric: str
+ :keyword enable_model_explainability: Whether to enable explaining the best AutoML model at the end of all AutoML
+ training iterations.
+ The default is None. For more information, see
+ `Interpretability: model explanations in automated machine learning
+ <https://learn.microsoft.com/azure/machine-learning/how-to-machine-learning-interpretability-automl>`__.
+ :paramtype enable_model_explainability: bool
+ :keyword weight_column_name: The name of the sample weight column. Automated ML supports a weighted column
+ as an input, causing rows in the data to be weighted up or down.
+ If the input data is from a pandas.DataFrame which doesn't have column names,
+ column indices can be used instead, expressed as integers.
+
+ This parameter is applicable to ``training_data`` and ``validation_data`` parameters
+ :paramtype weight_column_name: str
+ :keyword validation_data: The validation data to be used within the experiment.
+ It should contain both training features and label column (optionally a sample weights column).
+
+ Defaults to None
+ :paramtype validation_data: Input
+ :keyword validation_data_size: What fraction of the data to hold out for validation when user validation data
+ is not specified. This should be between 0.0 and 1.0 non-inclusive.
+
+ Specify ``validation_data`` to provide validation data, otherwise set ``n_cross_validations`` or
+ ``validation_data_size`` to extract validation data out of the specified training data.
+ For custom cross validation fold, use ``cv_split_column_names``.
+
+ For more information, see
+ `Configure data splits and cross-validation in automated machine learning <https://learn.microsoft.com
+ /azure/machine-learning/how-to-configure-cross-validation-data-splits>`__.
+
+ Defaults to None
+ :paramtype validation_data_size: float
+ :keyword n_cross_validations: How many cross validations to perform when user validation data is not specified.
+
+ Specify ``validation_data`` to provide validation data, otherwise set ``n_cross_validations`` or
+ ``validation_data_size`` to extract validation data out of the specified training data.
+ For custom cross validation fold, use ``cv_split_column_names``.
+
+ For more information, see
+ `Configure data splits and cross-validation in automated machine learning <https://learn.microsoft.com
+ /azure/machine-learning/how-to-configure-cross-validation-data-splits>`__.
+
+ Defaults to None
+ :paramtype n_cross_validations: Union[str, int]
+ :keyword cv_split_column_names: List of names of the columns that contain custom cross validation split.
+ Each of the CV split columns represents one CV split where each row are either marked
+ 1 for training or 0 for validation.
+
+ Defaults to None
+ :paramtype cv_split_column_names: List[str]
+ :keyword test_data: The Model Test feature using test datasets or test data splits is a feature in
+ Preview state and might change at any time.
+ The test data to be used for a test run that will automatically be started after
+ model training is complete. The test run will get predictions using the best model
+ and will compute metrics given these predictions.
+
+ If this parameter or the ``test_data_size`` parameter are not specified then
+ no test run will be executed automatically after model training is completed.
+ Test data should contain both features and label column.
+ If ``test_data`` is specified then the ``target_column_name`` parameter must be specified.
+
+ Defaults to None
+ :paramtype test_data: Input
+ :keyword test_data_size: The Model Test feature using test datasets or test data splits is a feature in
+ Preview state and might change at any time.
+ What fraction of the training data to hold out for test data for a test run that will
+ automatically be started after model training is complete. The test run will get
+ predictions using the best model and will compute metrics given these predictions.
+
+ This should be between 0.0 and 1.0 non-inclusive.
+ If ``test_data_size`` is specified at the same time as ``validation_data_size``,
+ then the test data is split from ``training_data`` before the validation data is split.
+ For example, if ``validation_data_size=0.1``, ``test_data_size=0.1`` and the original training data
+ has 1000 rows, then the test data will have 100 rows, the validation data will contain 90 rows
+ and the training data will have 810 rows.
+
+ For regression based tasks, random sampling is used. For classification
+ tasks, stratified sampling is used. Forecasting does not currently
+ support specifying a test dataset using a train/test split.
+
+ If this parameter or the ``test_data`` parameter are not specified then
+ no test run will be executed automatically after model training is completed.
+
+ Defaults to None
+ :paramtype test_data_size: float
+ :keyword forecasting_settings: The settings for the forecasting task
+ :paramtype forecasting_settings: ForecastingSettings
+ :return: A job object that can be submitted to an Azure ML compute for execution.
+ :rtype: ForecastingJob
+ """
+ forecast_job = ForecastingJob(
+ primary_metric=primary_metric,
+ forecasting_settings=forecasting_settings,
+ **kwargs,
+ )
+ forecast_job.set_data(
+ training_data=training_data,
+ target_column_name=target_column_name,
+ weight_column_name=weight_column_name,
+ validation_data=validation_data,
+ validation_data_size=validation_data_size,
+ n_cross_validations=n_cross_validations,
+ cv_split_column_names=cv_split_column_names,
+ test_data=test_data,
+ test_data_size=test_data_size,
+ )
+ forecast_job.set_training(enable_model_explainability=enable_model_explainability)
+
+ return forecast_job