diff options
author | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
---|---|---|
committer | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
commit | 4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch) | |
tree | ee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/azure/ai/ml/automl | |
parent | cc961e04ba734dd72309fb548a2f97d67d578813 (diff) | |
download | gn-ai-master.tar.gz |
Diffstat (limited to '.venv/lib/python3.12/site-packages/azure/ai/ml/automl')
4 files changed, 1062 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/automl/__init__.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/automl/__init__.py new file mode 100644 index 00000000..d34e30a8 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/automl/__init__.py @@ -0,0 +1,144 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +"""Contains automated machine learning classes for Azure Machine Learning SDKv2. + +Main areas include managing AutoML tasks. +""" +from azure.ai.ml.entities._job.automl import TrainingSettings +from azure.ai.ml.entities._job.automl.image import ( + LogTrainingMetrics, + LogValidationLoss, + ImageClassificationJob, + ImageClassificationMultilabelJob, + ImageClassificationSearchSpace, + ImageInstanceSegmentationJob, + ImageLimitSettings, + ImageModelSettingsClassification, + ImageModelSettingsObjectDetection, + ImageObjectDetectionJob, + ImageObjectDetectionSearchSpace, + ImageSweepSettings, +) +from azure.ai.ml.entities._job.automl.nlp import ( + NlpFeaturizationSettings, + NlpFixedParameters, + NlpLimitSettings, + NlpSearchSpace, + NlpSweepSettings, + TextClassificationJob, + TextClassificationMultilabelJob, + TextNerJob, +) +from azure.ai.ml.entities._job.automl.search_space import SearchSpace +from azure.ai.ml.entities._job.automl.stack_ensemble_settings import ( + StackEnsembleSettings, +) +from azure.ai.ml.entities._job.automl.tabular import ( + ClassificationJob, + ColumnTransformer, + ForecastingJob, + ForecastingSettings, + RegressionJob, + TabularFeaturizationSettings, + TabularLimitSettings, +) + +from .._restclient.v2023_04_01_preview.models import ( + BlockedTransformers, + ClassificationModels, + ClassificationMultilabelPrimaryMetrics, + ClassificationPrimaryMetrics, + FeaturizationMode, + ForecastHorizonMode, + ForecastingModels, + ForecastingPrimaryMetrics, + InstanceSegmentationPrimaryMetrics, + LearningRateScheduler, + NCrossValidationsMode, + ObjectDetectionPrimaryMetrics, + RegressionModels, + RegressionPrimaryMetrics, + SamplingAlgorithmType, + ShortSeriesHandlingConfiguration, + StochasticOptimizer, + TargetAggregationFunction, + TargetLagsMode, + TargetRollingWindowSizeMode, + UseStl, + ValidationMetricType, +) +from ._automl_image import ( + image_classification, + image_classification_multilabel, + image_instance_segmentation, + image_object_detection, +) +from ._automl_nlp import text_classification, text_classification_multilabel, text_ner +from ._automl_tabular import classification, forecasting, regression + +__all__ = [ + "ClassificationModels", + "RegressionModels", + "ForecastingModels", + "FeaturizationMode", + "NCrossValidationsMode", + "ForecastHorizonMode", + "ShortSeriesHandlingConfiguration", + "TargetLagsMode", + "TargetRollingWindowSizeMode", + "TargetAggregationFunction", + "UseStl", + "ClassificationPrimaryMetrics", + "RegressionPrimaryMetrics", + "ForecastingPrimaryMetrics", + "ClassificationMultilabelPrimaryMetrics", + "ObjectDetectionPrimaryMetrics", + "InstanceSegmentationPrimaryMetrics", + "ColumnTransformer", + "TabularFeaturizationSettings", + "ForecastingSettings", + "TabularLimitSettings", + "NlpFeaturizationSettings", + "NlpFixedParameters", + "NlpLimitSettings", + "NlpSweepSettings", + "NlpSearchSpace", + "LogTrainingMetrics", + "LogValidationLoss", + "ImageLimitSettings", + "ImageModelSettingsClassification", + "ImageModelSettingsObjectDetection", + "ImageSweepSettings", + "ImageObjectDetectionSearchSpace", + "ImageClassificationSearchSpace", + "TrainingSettings", + "image_classification", + "image_classification_multilabel", + "image_object_detection", + "image_instance_segmentation", + "text_classification", + "text_classification_multilabel", + "text_ner", + "classification", + "regression", + "forecasting", + "SearchSpace", + "StackEnsembleSettings", + "BlockedTransformers", + "ClassificationJob", + "ForecastingJob", + "RegressionJob", + "ImageClassificationJob", + "ImageClassificationMultilabelJob", + "ImageObjectDetectionJob", + "ImageInstanceSegmentationJob", + "LearningRateScheduler", + "SamplingAlgorithmType", + "StochasticOptimizer", + "TextClassificationJob", + "TextClassificationMultilabelJob", + "TextNerJob", + "ValidationMetricType", +] diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/automl/_automl_image.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/automl/_automl_image.py new file mode 100644 index 00000000..521c9aa2 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/automl/_automl_image.py @@ -0,0 +1,298 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +"""Entrypoints for creating AutoML tasks.""" + +from typing import Optional, TypeVar, Union + +from azure.ai.ml._restclient.v2023_04_01_preview.models import ( + ClassificationMultilabelPrimaryMetrics, + ClassificationPrimaryMetrics, + InstanceSegmentationPrimaryMetrics, + ObjectDetectionPrimaryMetrics, +) +from azure.ai.ml.entities._builders.base_node import pipeline_node_decorator +from azure.ai.ml.entities._inputs_outputs import Input +from azure.ai.ml.entities._job.automl.image.automl_image_object_detection_base import AutoMLImageObjectDetectionBase +from azure.ai.ml.entities._job.automl.image.image_classification_job import ImageClassificationJob +from azure.ai.ml.entities._job.automl.image.image_classification_multilabel_job import ImageClassificationMultilabelJob +from azure.ai.ml.entities._job.automl.image.image_instance_segmentation_job import ImageInstanceSegmentationJob +from azure.ai.ml.entities._job.automl.image.image_object_detection_job import ImageObjectDetectionJob + +TImageJob = TypeVar("TImageJob", bound=AutoMLImageObjectDetectionBase) + + +def _create_image_job( + job_cls: TImageJob, + training_data: Input, + target_column_name: str, + primary_metric: Optional[Union[str, ClassificationPrimaryMetrics]] = None, + validation_data: Optional[Input] = None, + validation_data_size: Optional[float] = None, + **kwargs, +) -> TImageJob: + """Helper function to create objects for AutoML Image jobs. + + :param job_cls: The job class + :type job_cls: TImageJob + :param training_data: The training input data + :type training_data: ~azure.ai.ml.entities.Input + :param target_column_name: The target column name + :type target_column_name: str + :param primary_metric: The primary metric + :type primary_metric: Optional[Union[str, ~azure.ai.ml.automl.ClassificationPrimaryMetrics]] + :param validation_data: The validation data + :type validation_data: Optional[~azure.ai.ml.entities.Input] + :param validation_data_size: The validation data size + :type validation_data_size: Optional[float] + :return: An AutoML Image Job + :rtype: TImageJob + """ + image_job = job_cls(primary_metric=primary_metric, **kwargs) # type: ignore[operator] + image_job.set_data( + training_data=training_data, + target_column_name=target_column_name, + validation_data=validation_data, + validation_data_size=validation_data_size, + ) + + return image_job + + +@pipeline_node_decorator +def image_classification( + *, + training_data: Input, + target_column_name: str, + primary_metric: Optional[Union[str, ClassificationPrimaryMetrics]] = None, + validation_data: Optional[Input] = None, + validation_data_size: Optional[float] = None, + **kwargs, +) -> ImageClassificationJob: + """Creates an object for AutoML Image multi-class Classification job. + + :keyword training_data: The training data to be used within the experiment. + :paramtype training_data: ~azure.ai.ml.entities.Input + :keyword target_column_name: The name of the label column. + This parameter is applicable to ``training_data`` and ``validation_data`` parameters. + :paramtype target_column_name: str + :keyword primary_metric: The metric that Automated Machine Learning will optimize for model selection. + Automated Machine Learning collects more metrics than it can optimize. + For more information on how metrics are calculated, see + https://learn.microsoft.com/azure/machine-learning/how-to-configure-auto-train#primary-metric. + + Acceptable values: accuracy, AUC_weighted, norm_macro_recall, average_precision_score_weighted, + and precision_score_weighted + Defaults to accuracy. + :paramtype primary_metric: Union[str, ~azure.ai.ml.automl.ClassificationPrimaryMetrics] + :keyword validation_data: The validation data to be used within the experiment. + :paramtype validation_data: Optional[~azure.ai.ml.entities.Input] + :keyword validation_data_size: What fraction of the data to hold out for validation when user validation data + is not specified. This should be between 0.0 and 1.0 non-inclusive. + + Specify ``validation_data`` to provide validation data, otherwise set ``validation_data_size`` + to extract validation data out of the specified training data. + + Defaults to .2 + :paramtype validation_data_size: float + + :return: Image classification job object that can be submitted to an Azure ML compute for execution. + :rtype: ~azure.ai.ml.automl.ImageClassificationJob + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_automl_image.py + :start-after: [START automl.image_classification] + :end-before: [END automl.image_classification] + :language: python + :dedent: 8 + :caption: creating an automl image classification job + """ + return _create_image_job( # type: ignore[type-var, return-value] + job_cls=ImageClassificationJob, + training_data=training_data, + target_column_name=target_column_name, + primary_metric=primary_metric, + validation_data=validation_data, + validation_data_size=validation_data_size, + **kwargs, + ) + + +@pipeline_node_decorator +def image_classification_multilabel( + *, + training_data: Input, + target_column_name: str, + primary_metric: Optional[Union[str, ClassificationMultilabelPrimaryMetrics]] = None, + validation_data: Optional[Input] = None, + validation_data_size: Optional[float] = None, + **kwargs, +) -> ImageClassificationMultilabelJob: + """Creates an object for AutoML Image multi-label Classification job. + + :keyword training_data: The training data to be used within the experiment. + :paramtype training_data: ~azure.ai.ml.entities.Input + :keyword target_column_name: The name of the label column. + This parameter is applicable to ``training_data`` and ``validation_data`` parameters. + :paramtype target_column_name: str + :keyword primary_metric: The metric that Automated Machine Learning will optimize for model selection. + Automated Machine Learning collects more metrics than it can optimize. + For more information on how metrics are calculated, see + https://learn.microsoft.com/azure/machine-learning/how-to-configure-auto-train#primary-metric. + + Acceptable values: accuracy, AUC_weighted, norm_macro_recall, average_precision_score_weighted, + precision_score_weighted, and Iou + Defaults to Iou. + :paramtype primary_metric: Union[str, ~azure.ai.ml.automl.ClassificationMultilabelPrimaryMetrics] + :keyword validation_data: The validation data to be used within the experiment. + :paramtype validation_data: Optional[~azure.ai.ml.entities.Input] + :keyword validation_data_size: The fraction of the training data to hold out for validation when user does not + provide the validation data. This should be between 0.0 and 1.0 non-inclusive. + + Specify ``validation_data`` to provide validation data, otherwise set ``validation_data_size`` + to extract validation data out of the specified training data. + + Defaults to .2 + :paramtype validation_data_size: float + + :return: Image multi-label classification job object that can be submitted to an Azure ML compute for execution. + :rtype: ~azure.ai.ml.automl.ImageClassificationMultilabelJob + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_automl_image.py + :start-after: [START automl.image_classification_multilabel] + :end-before: [END automl.image_classification_multilabel] + :language: python + :dedent: 8 + :caption: creating an automl image multilabel classification job + """ + return _create_image_job( # type: ignore[type-var, return-value] + job_cls=ImageClassificationMultilabelJob, + training_data=training_data, + target_column_name=target_column_name, + primary_metric=primary_metric, + validation_data=validation_data, + validation_data_size=validation_data_size, + **kwargs, + ) + + +@pipeline_node_decorator +def image_object_detection( + *, + training_data: Input, + target_column_name: str, + primary_metric: Optional[Union[str, ObjectDetectionPrimaryMetrics]] = None, + validation_data: Optional[Input] = None, + validation_data_size: Optional[float] = None, + **kwargs, +) -> ImageObjectDetectionJob: + """Creates an object for AutoML Image Object Detection job. + + :keyword training_data: The training data to be used within the experiment. + :paramtype training_data: ~azure.ai.ml.entities.Input + :keyword target_column_name: The name of the label column. + This parameter is applicable to ``training_data`` and ``validation_data`` parameters. + :paramtype target_column_name: str + :keyword primary_metric: The metric that Automated Machine Learning will optimize for model selection. + Automated Machine Learning collects more metrics than it can optimize. + For more information on how metrics are calculated, see + https://learn.microsoft.com/azure/machine-learning/how-to-configure-auto-train#primary-metric. + + Acceptable values: MeanAveragePrecision + Defaults to MeanAveragePrecision. + :paramtype primary_metric: Union[str, ~azure.ai.ml.automl.ObjectDetectionPrimaryMetrics] + :keyword validation_data: The validation data to be used within the experiment. + :paramtype validation_data: Optional[~azure.ai.ml.entities.Input] + :keyword validation_data_size: The fraction of the training data to hold out for validation when user does not + provide the validation data. This should be between 0.0 and 1.0 non-inclusive. + + Specify ``validation_data`` to provide validation data, otherwise set ``validation_data_size`` + to extract validation data out of the specified training data. + + Defaults to .2 + :paramtype validation_data_size: float + + :return: Image object detection job object that can be submitted to an Azure ML compute for execution. + :rtype: ~azure.ai.ml.automl.ImageObjectDetectionJob + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_automl_image.py + :start-after: [START automl.image_object_detection] + :end-before: [END automl.image_object_detection] + :language: python + :dedent: 8 + :caption: creating an automl image object detection job + """ + return _create_image_job( # type: ignore[type-var, return-value] + job_cls=ImageObjectDetectionJob, + training_data=training_data, + target_column_name=target_column_name, + primary_metric=primary_metric, + validation_data=validation_data, + validation_data_size=validation_data_size, + **kwargs, + ) + + +@pipeline_node_decorator +def image_instance_segmentation( + *, + training_data: Input, + target_column_name: str, + primary_metric: Optional[Union[str, InstanceSegmentationPrimaryMetrics]] = None, + validation_data: Optional[Input] = None, + validation_data_size: Optional[float] = None, + **kwargs, +) -> ImageInstanceSegmentationJob: + """Creates an object for AutoML Image Instance Segmentation job. + + :keyword training_data: The training data to be used within the experiment. + :paramtype training_data: ~azure.ai.ml.entities.Input + :keyword target_column_name: The name of the label column. + This parameter is applicable to ``training_data`` and ``validation_data`` parameters. + :paramtype target_column_name: str + :keyword primary_metric: The metric that Automated Machine Learning will optimize for model selection. + Automated Machine Learning collects more metrics than it can optimize. + For more information on how metrics are calculated, see + https://learn.microsoft.com/azure/machine-learning/how-to-configure-auto-train#primary-metric. + + Acceptable values: MeanAveragePrecision + Defaults to MeanAveragePrecision. + :paramtype primary_metric: Union[str, ~azure.ai.ml.automl.InstanceSegmentationPrimaryMetrics] + :keyword validation_data: The validation data to be used within the experiment. + :paramtype validation_data: Optional[~azure.ai.ml.entities.Input] + :keyword validation_data_size: The fraction of the training data to hold out for validation when user does not + provide the validation data. This should be between 0.0 and 1.0 non-inclusive. + + Specify ``validation_data`` to provide validation data, otherwise set ``validation_data_size`` + to extract validation data out of the specified training data. + + Defaults to .2 + :paramtype validation_data_size: float + + :return: Image instance segmentation job + :rtype: ~azure.ai.ml.automl.ImageInstanceSegmentationJob + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_automl_image.py + :start-after: [START automl.image_instance_segmentation] + :end-before: [END automl.image_instance_segmentation] + :language: python + :dedent: 8 + :caption: creating an automl image instance segmentation job + """ + return _create_image_job( # type: ignore[type-var, return-value] + job_cls=ImageInstanceSegmentationJob, + training_data=training_data, + target_column_name=target_column_name, + primary_metric=primary_metric, + validation_data=validation_data, + validation_data_size=validation_data_size, + **kwargs, + ) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/automl/_automl_nlp.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/automl/_automl_nlp.py new file mode 100644 index 00000000..ac7ebcf6 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/automl/_automl_nlp.py @@ -0,0 +1,175 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +"""Entrypoints for creating AutoML tasks.""" + +from typing import Optional + +from azure.ai.ml.entities._builders.base_node import pipeline_node_decorator +from azure.ai.ml.entities._inputs_outputs import Input +from azure.ai.ml.entities._job.automl.nlp.text_classification_job import TextClassificationJob +from azure.ai.ml.entities._job.automl.nlp.text_classification_multilabel_job import TextClassificationMultilabelJob +from azure.ai.ml.entities._job.automl.nlp.text_ner_job import TextNerJob + + +@pipeline_node_decorator +def text_classification( + *, + training_data: Input, + target_column_name: str, + validation_data: Input, + primary_metric: Optional[str] = None, + log_verbosity: Optional[str] = None, + **kwargs, +) -> TextClassificationJob: + """Function to create a TextClassificationJob. + + A text classification job is used to train a model that can predict the class/category of a text data. + Input training data should include a target column that classifies the text into exactly one class. + + :keyword training_data: The training data to be used within the experiment. + It should contain both training features and a target column. + :paramtype training_data: Input + :keyword target_column_name: Name of the target column. + :paramtype target_column_name: str + :keyword validation_data: The validation data to be used within the experiment. + It should contain both training features and a target column. + :paramtype validation_data: Input + :keyword primary_metric: Primary metric for the task. + Acceptable values: accuracy, AUC_weighted, precision_score_weighted + :paramtype primary_metric: Union[str, ClassificationPrimaryMetrics] + :keyword log_verbosity: Log verbosity level. + :paramtype log_verbosity: str + + :return: The TextClassificationJob object. + :rtype: TextClassificationJob + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_automl_nlp.py + :start-after: [START automl.text_classification] + :end-before: [END automl.text_classification] + :language: python + :dedent: 8 + :caption: creating an automl text classification job + """ + + text_classification_job = TextClassificationJob( + primary_metric=primary_metric, + training_data=training_data, + target_column_name=target_column_name, + validation_data=validation_data, + log_verbosity=log_verbosity, + **kwargs, + ) + + return text_classification_job + + +@pipeline_node_decorator +def text_classification_multilabel( + *, + training_data: Input, + target_column_name: str, + validation_data: Input, + primary_metric: Optional[str] = None, + log_verbosity: Optional[str] = None, + **kwargs, +) -> TextClassificationMultilabelJob: + """Function to create a TextClassificationMultilabelJob. + + A text classification multilabel job is used to train a model that can predict the classes/categories + of a text data. Input training data should include a target column that classifies the text into class(es). + For more information on format of multilabel data, refer to: + https://learn.microsoft.com/azure/machine-learning/how-to-auto-train-nlp-models#multi-label + + :keyword training_data: The training data to be used within the experiment. + It should contain both training features and a target column. + :paramtype training_data: Input + :keyword target_column_name: Name of the target column. + :paramtype target_column_name: str + :keyword validation_data: The validation data to be used within the experiment. + It should contain both training features and a target column. + :paramtype validation_data: Input + :keyword primary_metric: Primary metric for the task. + Acceptable values: accuracy + :paramtype primary_metric: str + :keyword log_verbosity: Log verbosity level. + :paramtype log_verbosity: str + + :return: The TextClassificationMultilabelJob object. + :rtype: TextClassificationMultilabelJob + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_automl_nlp.py + :start-after: [START automl.text_classification_multilabel] + :end-before: [END automl.text_classification_multilabel] + :language: python + :dedent: 8 + :caption: creating an automl text multilabel classification job + """ + + text_classification_multilabel_job = TextClassificationMultilabelJob( + primary_metric=primary_metric, + training_data=training_data, + target_column_name=target_column_name, + validation_data=validation_data, + log_verbosity=log_verbosity, + **kwargs, + ) + + return text_classification_multilabel_job + + +@pipeline_node_decorator +def text_ner( + *, + training_data: Input, + validation_data: Input, + primary_metric: Optional[str] = None, + log_verbosity: Optional[str] = None, + **kwargs, +) -> TextNerJob: + """Function to create a TextNerJob. + + A text named entity recognition job is used to train a model that can predict the named entities in the text. + Input training data should be a text file in CoNLL format. For more information on format of text NER data, + refer to: + https://learn.microsoft.com/azure/machine-learning/how-to-auto-train-nlp-models#named-entity-recognition-ner + + :keyword training_data: The training data to be used within the experiment. + It should contain both training features and a target column. + :paramtype training_data: Input + :keyword validation_data: The validation data to be used within the experiment. + It should contain both training features and a target column. + :paramtype validation_data: Input + :keyword primary_metric: Primary metric for the task. + Acceptable values: accuracy + :paramtype primary_metric: str + :keyword log_verbosity: Log verbosity level. + :paramtype log_verbosity: str + + :return: The TextNerJob object. + :rtype: TextNerJob + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_automl_nlp.py + :start-after: [START automl.text_ner] + :end-before: [END automl.text_ner] + :language: python + :dedent: 8 + :caption: creating an automl text ner job + """ + + text_ner_job = TextNerJob( + primary_metric=primary_metric, + training_data=training_data, + validation_data=validation_data, + log_verbosity=log_verbosity, + **kwargs, + ) + + return text_ner_job diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/automl/_automl_tabular.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/automl/_automl_tabular.py new file mode 100644 index 00000000..9c45e5f1 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/automl/_automl_tabular.py @@ -0,0 +1,445 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +"""Entrypoints for creating AutoML tasks.""" +from typing import List, Optional, Union + +from azure.ai.ml.entities._builders.base_node import pipeline_node_decorator +from azure.ai.ml.entities._inputs_outputs import Input +from azure.ai.ml.entities._job.automl.tabular import ( + ClassificationJob, + ForecastingJob, + ForecastingSettings, + RegressionJob, +) + + +@pipeline_node_decorator +def classification( + *, + training_data: Input, + target_column_name: str, + primary_metric: Optional[str] = None, + enable_model_explainability: Optional[bool] = None, + weight_column_name: Optional[str] = None, + validation_data: Optional[Input] = None, + validation_data_size: Optional[float] = None, + n_cross_validations: Optional[Union[str, int]] = None, + cv_split_column_names: Optional[List[str]] = None, + test_data: Optional[Input] = None, + test_data_size: Optional[float] = None, + **kwargs, +) -> ClassificationJob: + """Function to create a ClassificationJob. + + A classification job is used to train a model that best predict the class of a data sample. + Various models are trained using the training data. The model with the best performance on the validation data + based on the primary metric is selected as the final model. + + :keyword training_data: The training data to be used within the experiment. + It should contain both training features and a label column (optionally a sample weights column). + :paramtype training_data: Input + :keyword target_column_name: The name of the label column. + This parameter is applicable to ``training_data``, ``validation_data`` and ``test_data`` parameters + :paramtype target_column_name: str + :keyword primary_metric: The metric that Automated Machine Learning will optimize for model selection. + Automated Machine Learning collects more metrics than it can optimize. + For more information on how metrics are calculated, see + https://learn.microsoft.com/azure/machine-learning/how-to-configure-auto-train#primary-metric. + + Acceptable values: accuracy, AUC_weighted, norm_macro_recall, average_precision_score_weighted, + and precision_score_weighted + Defaults to accuracy + :type primary_metric: str + :keyword enable_model_explainability: Whether to enable explaining the best AutoML model at the end of all AutoML + training iterations. + The default is None. For more information, see + `Interpretability: model explanations in automated machine learning + <https://learn.microsoft.com/azure/machine-learning/how-to-machine-learning-interpretability-automl>`__. + :paramtype enable_model_explainability: bool + :keyword weight_column_name: The name of the sample weight column. Automated ML supports a weighted column + as an input, causing rows in the data to be weighted up or down. + If the input data is from a pandas.DataFrame which doesn't have column names, + column indices can be used instead, expressed as integers. + + This parameter is applicable to ``training_data`` and ``validation_data`` parameters + :paramtype weight_column_name: str + :keyword validation_data: The validation data to be used within the experiment. + It should contain both training features and label column (optionally a sample weights column). + + Defaults to None + :paramtype validation_data: Input + :keyword validation_data_size: What fraction of the data to hold out for validation when user validation data + is not specified. This should be between 0.0 and 1.0 non-inclusive. + + Specify ``validation_data`` to provide validation data, otherwise set ``n_cross_validations`` or + ``validation_data_size`` to extract validation data out of the specified training data. + For custom cross validation fold, use ``cv_split_column_names``. + + For more information, see + `Configure data splits and cross-validation in automated machine learning <https://learn.microsoft.com + /azure/machine-learning/how-to-configure-cross-validation-data-splits>`__. + + Defaults to None + :paramtype validation_data_size: float + :keyword n_cross_validations: How many cross validations to perform when user validation data is not specified. + + Specify ``validation_data`` to provide validation data, otherwise set ``n_cross_validations`` or + ``validation_data_size`` to extract validation data out of the specified training data. + For custom cross validation fold, use ``cv_split_column_names``. + + For more information, see + `Configure data splits and cross-validation in automated machine learning <https://learn.microsoft.com + /azure/machine-learning/how-to-configure-cross-validation-data-splits>`__. + + Defaults to None + :paramtype n_cross_validations: Union[str, int] + :keyword cv_split_column_names: List of names of the columns that contain custom cross validation split. + Each of the CV split columns represents one CV split where each row are either marked + 1 for training or 0 for validation. + + Defaults to None + :paramtype cv_split_column_names: List[str] + :keyword test_data: The Model Test feature using test datasets or test data splits is a feature in + Preview state and might change at any time. + The test data to be used for a test run that will automatically be started after + model training is complete. The test run will get predictions using the best model + and will compute metrics given these predictions. + + If this parameter or the ``test_data_size`` parameter are not specified then + no test run will be executed automatically after model training is completed. + Test data should contain both features and label column. + If ``test_data`` is specified then the ``target_column_name`` parameter must be specified. + + Defaults to None + :paramtype test_data: Input + :keyword test_data_size: The Model Test feature using test datasets or test data splits is a feature in + Preview state and might change at any time. + What fraction of the training data to hold out for test data for a test run that will + automatically be started after model training is complete. The test run will get + predictions using the best model and will compute metrics given these predictions. + + This should be between 0.0 and 1.0 non-inclusive. + If ``test_data_size`` is specified at the same time as ``validation_data_size``, + then the test data is split from ``training_data`` before the validation data is split. + For example, if ``validation_data_size=0.1``, ``test_data_size=0.1`` and the original training data has + 1000 rows, then the test data will have 100 rows, the validation data will contain 90 rows and the + training data will have 810 rows. + + For regression based tasks, random sampling is used. For classification tasks, stratified sampling + is used. Forecasting does not currently support specifying a test dataset using a train/test split. + + If this parameter or the ``test_data`` parameter are not specified then + no test run will be executed automatically after model training is completed. + + Defaults to None + :paramtype test_data_size: float + :return: A job object that can be submitted to an Azure ML compute for execution. + :rtype: ClassificationJob + """ + classification_job = ClassificationJob(primary_metric=primary_metric, **kwargs) + + classification_job.set_data( + training_data=training_data, + target_column_name=target_column_name, + weight_column_name=weight_column_name, + validation_data=validation_data, + validation_data_size=validation_data_size, + n_cross_validations=n_cross_validations, + cv_split_column_names=cv_split_column_names, + test_data=test_data, + test_data_size=test_data_size, + ) + classification_job.set_training(enable_model_explainability=enable_model_explainability) + + return classification_job + + +@pipeline_node_decorator +def regression( + *, + training_data: Input, + target_column_name: str, + primary_metric: Optional[str] = None, + enable_model_explainability: Optional[bool] = None, + weight_column_name: Optional[str] = None, + validation_data: Optional[Input] = None, + validation_data_size: Optional[float] = None, + n_cross_validations: Optional[Union[str, int]] = None, + cv_split_column_names: Optional[List[str]] = None, + test_data: Optional[Input] = None, + test_data_size: Optional[float] = None, + **kwargs, +) -> RegressionJob: + """Function to create a Regression Job. + + A regression job is used to train a model to predict continuous values of a target variable from a dataset. + Various models are trained using the training data. The model with the best performance on the validation data + based on the primary metric is selected as the final model. + + + :keyword training_data: The training data to be used within the experiment. + It should contain both training features and a label column (optionally a sample weights column). + :paramtype training_data: Input + :keyword target_column_name: The name of the label column. + This parameter is applicable to ``training_data``, ``validation_data`` and ``test_data`` parameters + :paramtype target_column_name: str + :keyword primary_metric: The metric that Automated Machine Learning will optimize for model selection. + Automated Machine Learning collects more metrics than it can optimize. + For more information on how metrics are calculated, see + https://learn.microsoft.com/azure/machine-learning/how-to-configure-auto-train#primary-metric. + + Acceptable values: spearman_correlation, r2_score, normalized_mean_absolute_error, + normalized_root_mean_squared_error. + Defaults to normalized_root_mean_squared_error + :type primary_metric: str + :keyword enable_model_explainability: Whether to enable explaining the best AutoML model at the end of all AutoML + training iterations. + The default is None. For more information, see + `Interpretability: model explanations in automated machine learning + <https://learn.microsoft.com/azure/machine-learning/how-to-machine-learning-interpretability-automl>`__. + :paramtype enable_model_explainability: bool + :keyword weight_column_name: The name of the sample weight column. Automated ML supports a weighted column + as an input, causing rows in the data to be weighted up or down. + If the input data is from a pandas.DataFrame which doesn't have column names, + column indices can be used instead, expressed as integers. + + This parameter is applicable to ``training_data`` and ``validation_data`` parameters + :paramtype weight_column_name: str + :keyword validation_data: The validation data to be used within the experiment. + It should contain both training features and label column (optionally a sample weights column). + + Defaults to None + :paramtype validation_data: Input + :keyword validation_data_size: What fraction of the data to hold out for validation when user validation data + is not specified. This should be between 0.0 and 1.0 non-inclusive. + + Specify ``validation_data`` to provide validation data, otherwise set ``n_cross_validations`` or + ``validation_data_size`` to extract validation data out of the specified training data. + For custom cross validation fold, use ``cv_split_column_names``. + + For more information, see + `Configure data splits and cross-validation in automated machine learning <https://learn.microsoft.com + /azure/machine-learning/how-to-configure-cross-validation-data-splits>`__. + + Defaults to None + :paramtype validation_data_size: float + :keyword n_cross_validations: How many cross validations to perform when user validation data is not specified. + + Specify ``validation_data`` to provide validation data, otherwise set ``n_cross_validations`` or + ``validation_data_size`` to extract validation data out of the specified training data. + For custom cross validation fold, use ``cv_split_column_names``. + + For more information, see + `Configure data splits and cross-validation in automated machine learning <https://learn.microsoft.com + /azure/machine-learning/how-to-configure-cross-validation-data-splits>`__. + + Defaults to None + :paramtype n_cross_validations: Union[str, int] + :keyword cv_split_column_names: List of names of the columns that contain custom cross validation split. + Each of the CV split columns represents one CV split where each row are either marked + 1 for training or 0 for validation. + + Defaults to None + :paramtype cv_split_column_names: List[str] + :keyword test_data: The Model Test feature using test datasets or test data splits is a feature in + Preview state and might change at any time. + The test data to be used for a test run that will automatically be started after + model training is complete. The test run will get predictions using the best model + and will compute metrics given these predictions. + + If this parameter or the ``test_data_size`` parameter are not specified then + no test run will be executed automatically after model training is completed. + Test data should contain both features and label column. + If ``test_data`` is specified then the ``target_column_name`` parameter must be specified. + + Defaults to None + :paramtype test_data: Input + :keyword test_data_size: The Model Test feature using test datasets or test data splits is a feature in + Preview state and might change at any time. + What fraction of the training data to hold out for test data for a test run that will + automatically be started after model training is complete. The test run will get + predictions using the best model and will compute metrics given these predictions. + + This should be between 0.0 and 1.0 non-inclusive. + If ``test_data_size`` is specified at the same time as ``validation_data_size``, + then the test data is split from ``training_data`` before the validation data is split. + For example, if ``validation_data_size=0.1``, ``test_data_size=0.1`` and the original training data has + 1000 rows, then the test data will have 100 rows, the validation data will contain 90 rows + and the training data will have 810 rows. + + For regression based tasks, random sampling is used. For classification + tasks, stratified sampling is used. Forecasting does not currently + support specifying a test dataset using a train/test split. + + If this parameter or the ``test_data`` parameter are not specified then + no test run will be executed automatically after model training is completed. + + Defaults to None + :paramtype test_data_size: float + :return: A job object that can be submitted to an Azure ML compute for execution. + :rtype: RegressionJob + """ + regression_job = RegressionJob(primary_metric=primary_metric, **kwargs) + regression_job.set_data( + training_data=training_data, + target_column_name=target_column_name, + weight_column_name=weight_column_name, + validation_data=validation_data, + validation_data_size=validation_data_size, + n_cross_validations=n_cross_validations, + cv_split_column_names=cv_split_column_names, + test_data=test_data, + test_data_size=test_data_size, + ) + regression_job.set_training(enable_model_explainability=enable_model_explainability) + + return regression_job + + +@pipeline_node_decorator +def forecasting( + *, + training_data: Input, + target_column_name: str, + primary_metric: Optional[str] = None, + enable_model_explainability: Optional[bool] = None, + weight_column_name: Optional[str] = None, + validation_data: Optional[Input] = None, + validation_data_size: Optional[float] = None, + n_cross_validations: Optional[Union[str, int]] = None, + cv_split_column_names: Optional[List[str]] = None, + test_data: Optional[Input] = None, + test_data_size: Optional[float] = None, + forecasting_settings: Optional[ForecastingSettings] = None, + **kwargs, +) -> ForecastingJob: + """Function to create a Forecasting job. + + A forecasting task is used to predict target values for a future time period based on the historical data. + Various models are trained using the training data. The model with the best performance on the validation data + based on the primary metric is selected as the final model. + + :keyword training_data: The training data to be used within the experiment. + It should contain both training features and a label column (optionally a sample weights column). + :paramtype training_data: Input + :keyword target_column_name: The name of the label column. + This parameter is applicable to ``training_data``, ``validation_data`` and ``test_data`` parameters + :paramtype target_column_name: str + :keyword primary_metric: The metric that Automated Machine Learning will optimize for model selection. + Automated Machine Learning collects more metrics than it can optimize. + For more information on how metrics are calculated, see + https://learn.microsoft.com/azure/machine-learning/how-to-configure-auto-train#primary-metric. + + Acceptable values: r2_score, normalized_mean_absolute_error, normalized_root_mean_squared_error + Defaults to normalized_root_mean_squared_error + :type primary_metric: str + :keyword enable_model_explainability: Whether to enable explaining the best AutoML model at the end of all AutoML + training iterations. + The default is None. For more information, see + `Interpretability: model explanations in automated machine learning + <https://learn.microsoft.com/azure/machine-learning/how-to-machine-learning-interpretability-automl>`__. + :paramtype enable_model_explainability: bool + :keyword weight_column_name: The name of the sample weight column. Automated ML supports a weighted column + as an input, causing rows in the data to be weighted up or down. + If the input data is from a pandas.DataFrame which doesn't have column names, + column indices can be used instead, expressed as integers. + + This parameter is applicable to ``training_data`` and ``validation_data`` parameters + :paramtype weight_column_name: str + :keyword validation_data: The validation data to be used within the experiment. + It should contain both training features and label column (optionally a sample weights column). + + Defaults to None + :paramtype validation_data: Input + :keyword validation_data_size: What fraction of the data to hold out for validation when user validation data + is not specified. This should be between 0.0 and 1.0 non-inclusive. + + Specify ``validation_data`` to provide validation data, otherwise set ``n_cross_validations`` or + ``validation_data_size`` to extract validation data out of the specified training data. + For custom cross validation fold, use ``cv_split_column_names``. + + For more information, see + `Configure data splits and cross-validation in automated machine learning <https://learn.microsoft.com + /azure/machine-learning/how-to-configure-cross-validation-data-splits>`__. + + Defaults to None + :paramtype validation_data_size: float + :keyword n_cross_validations: How many cross validations to perform when user validation data is not specified. + + Specify ``validation_data`` to provide validation data, otherwise set ``n_cross_validations`` or + ``validation_data_size`` to extract validation data out of the specified training data. + For custom cross validation fold, use ``cv_split_column_names``. + + For more information, see + `Configure data splits and cross-validation in automated machine learning <https://learn.microsoft.com + /azure/machine-learning/how-to-configure-cross-validation-data-splits>`__. + + Defaults to None + :paramtype n_cross_validations: Union[str, int] + :keyword cv_split_column_names: List of names of the columns that contain custom cross validation split. + Each of the CV split columns represents one CV split where each row are either marked + 1 for training or 0 for validation. + + Defaults to None + :paramtype cv_split_column_names: List[str] + :keyword test_data: The Model Test feature using test datasets or test data splits is a feature in + Preview state and might change at any time. + The test data to be used for a test run that will automatically be started after + model training is complete. The test run will get predictions using the best model + and will compute metrics given these predictions. + + If this parameter or the ``test_data_size`` parameter are not specified then + no test run will be executed automatically after model training is completed. + Test data should contain both features and label column. + If ``test_data`` is specified then the ``target_column_name`` parameter must be specified. + + Defaults to None + :paramtype test_data: Input + :keyword test_data_size: The Model Test feature using test datasets or test data splits is a feature in + Preview state and might change at any time. + What fraction of the training data to hold out for test data for a test run that will + automatically be started after model training is complete. The test run will get + predictions using the best model and will compute metrics given these predictions. + + This should be between 0.0 and 1.0 non-inclusive. + If ``test_data_size`` is specified at the same time as ``validation_data_size``, + then the test data is split from ``training_data`` before the validation data is split. + For example, if ``validation_data_size=0.1``, ``test_data_size=0.1`` and the original training data + has 1000 rows, then the test data will have 100 rows, the validation data will contain 90 rows + and the training data will have 810 rows. + + For regression based tasks, random sampling is used. For classification + tasks, stratified sampling is used. Forecasting does not currently + support specifying a test dataset using a train/test split. + + If this parameter or the ``test_data`` parameter are not specified then + no test run will be executed automatically after model training is completed. + + Defaults to None + :paramtype test_data_size: float + :keyword forecasting_settings: The settings for the forecasting task + :paramtype forecasting_settings: ForecastingSettings + :return: A job object that can be submitted to an Azure ML compute for execution. + :rtype: ForecastingJob + """ + forecast_job = ForecastingJob( + primary_metric=primary_metric, + forecasting_settings=forecasting_settings, + **kwargs, + ) + forecast_job.set_data( + training_data=training_data, + target_column_name=target_column_name, + weight_column_name=weight_column_name, + validation_data=validation_data, + validation_data_size=validation_data_size, + n_cross_validations=n_cross_validations, + cv_split_column_names=cv_split_column_names, + test_data=test_data, + test_data_size=test_data_size, + ) + forecast_job.set_training(enable_model_explainability=enable_model_explainability) + + return forecast_job |