diff options
| author | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
|---|---|---|
| committer | S. Solomon Darnell | 2025-03-28 21:52:21 -0500 |
| commit | 4a52a71956a8d46fcb7294ac71734504bb09bcc2 (patch) | |
| tree | ee3dc5af3b6313e921cd920906356f5d4febc4ed /.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp | |
| parent | cc961e04ba734dd72309fb548a2f97d67d578813 (diff) | |
| download | gn-ai-master.tar.gz | |
Diffstat (limited to '.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp')
10 files changed, 1716 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/__init__.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/__init__.py new file mode 100644 index 00000000..9be7b483 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/__init__.py @@ -0,0 +1,25 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +from .automl_nlp_job import AutoMLNLPJob +from .nlp_featurization_settings import NlpFeaturizationSettings +from .nlp_fixed_parameters import NlpFixedParameters +from .nlp_limit_settings import NlpLimitSettings +from .nlp_search_space import NlpSearchSpace +from .nlp_sweep_settings import NlpSweepSettings +from .text_classification_job import TextClassificationJob +from .text_classification_multilabel_job import TextClassificationMultilabelJob +from .text_ner_job import TextNerJob + +__all__ = [ + "AutoMLNLPJob", + "NlpFeaturizationSettings", + "NlpFixedParameters", + "NlpLimitSettings", + "NlpSearchSpace", + "NlpSweepSettings", + "TextClassificationJob", + "TextClassificationMultilabelJob", + "TextNerJob", +] diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/automl_nlp_job.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/automl_nlp_job.py new file mode 100644 index 00000000..f0b3baa8 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/automl_nlp_job.py @@ -0,0 +1,467 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- +from abc import ABC +from typing import Any, Dict, List, Optional, Union + +from azure.ai.ml._restclient.v2023_04_01_preview.models import ( + LogVerbosity, + NlpLearningRateScheduler, + SamplingAlgorithmType, +) +from azure.ai.ml._utils.utils import camel_to_snake +from azure.ai.ml.entities._inputs_outputs import Input +from azure.ai.ml.entities._job.automl.automl_vertical import AutoMLVertical +from azure.ai.ml.entities._job.automl.nlp.nlp_featurization_settings import NlpFeaturizationSettings +from azure.ai.ml.entities._job.automl.nlp.nlp_fixed_parameters import NlpFixedParameters +from azure.ai.ml.entities._job.automl.nlp.nlp_limit_settings import NlpLimitSettings +from azure.ai.ml.entities._job.automl.nlp.nlp_search_space import NlpSearchSpace +from azure.ai.ml.entities._job.automl.nlp.nlp_sweep_settings import NlpSweepSettings +from azure.ai.ml.entities._job.automl.search_space import SearchSpace +from azure.ai.ml.entities._job.automl.utils import cast_to_specific_search_space +from azure.ai.ml.entities._job.sweep.early_termination_policy import EarlyTerminationPolicy +from azure.ai.ml.exceptions import ErrorCategory, ErrorTarget, ValidationException + + +# pylint: disable=too-many-instance-attributes,protected-access +class AutoMLNLPJob(AutoMLVertical, ABC): + """Base class for AutoML NLP jobs. + + You should not instantiate this class directly. Instead you should + create classes for specific NLP Jobs. + + :param task_type: NLP task type, must be one of 'TextClassification', + 'TextClassificationMultilabel', or 'TextNER' + :type task_type: str + :param primary_metric: Primary metric to display from NLP job + :type primary_metric: str + :param training_data: Training data + :type training_data: Input + :param validation_data: Validation data + :type validation_data: Input + :param target_column_name: Column name of the target column, defaults to None + :type target_column_name: Optional[str] + :param log_verbosity: The degree of verbosity used in logging, defaults to None, + must be one of 'NotSet', 'Debug', 'Info', 'Warning', 'Error', 'Critical', or None + :type log_verbosity: Optional[str] + :param featurization: Featurization settings used for NLP job, defaults to None + :type featurization: Optional[~azure.ai.ml.automl.NlpFeaturizationSettings] + :param limits: Limit settings for NLP jobs, defaults to None + :type limits: Optional[~azure.ai.ml.automl.NlpLimitSettings] + :param sweep: Sweep settings used for NLP job, defaults to None + :type sweep: Optional[~azure.ai.ml.automl.NlpSweepSettings] + :param training_parameters: Fixed parameters for the training of all candidates. + , defaults to None + :type training_parameters: Optional[~azure.ai.ml.automl.NlpFixedParameters] + :param search_space: Search space(s) to sweep over for NLP sweep jobs, defaults to None + :type search_space: Optional[List[~azure.ai.ml.automl.NlpSearchSpace]] + """ + + def __init__( + self, + *, + task_type: str, + primary_metric: str, + training_data: Optional[Input], + validation_data: Optional[Input], + target_column_name: Optional[str] = None, + log_verbosity: Optional[str] = None, + featurization: Optional[NlpFeaturizationSettings] = None, + limits: Optional[NlpLimitSettings] = None, + sweep: Optional[NlpSweepSettings] = None, + training_parameters: Optional[NlpFixedParameters] = None, + search_space: Optional[List[NlpSearchSpace]] = None, + **kwargs: Any, + ): + self._training_parameters: Optional[NlpFixedParameters] = None + + super().__init__( + task_type, training_data=training_data, validation_data=validation_data, **kwargs # type: ignore + ) + self.log_verbosity = log_verbosity + self._primary_metric: str = "" + self.primary_metric = primary_metric + + self.target_column_name = target_column_name + + self._featurization = featurization + self._limits = limits or NlpLimitSettings() + self._sweep = sweep + self.training_parameters = training_parameters # via setter method. + self._search_space = search_space + + @property + def training_parameters(self) -> Optional[NlpFixedParameters]: + """Parameters that are used for all submitted jobs. + + :return: fixed training parameters for NLP jobs + :rtype: ~azure.ai.ml.automl.NlpFixedParameters + """ + return self._training_parameters + + @training_parameters.setter + def training_parameters(self, value: Union[Dict, NlpFixedParameters]) -> None: + if value is None: + self._training_parameters = None + elif isinstance(value, NlpFixedParameters): + self._training_parameters = value + # Convert parameters from snake case to enum. + self.set_training_parameters(learning_rate_scheduler=value.learning_rate_scheduler) + else: + if not isinstance(value, dict): + msg = "Expected a dictionary for nlp training parameters." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.AUTOML, + error_category=ErrorCategory.USER_ERROR, + ) + self.set_training_parameters(**value) + + @property + def search_space(self) -> Optional[List[NlpSearchSpace]]: + """Search space(s) to sweep over for NLP sweep jobs + + :return: list of search spaces to sweep over for NLP jobs + :rtype: List[~azure.ai.ml.automl.NlpSearchSpace] + """ + return self._search_space + + @search_space.setter + def search_space(self, value: Union[List[dict], List[SearchSpace]]) -> None: + if not isinstance(value, list): + msg = "Expected a list for search space." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.AUTOML, + error_category=ErrorCategory.USER_ERROR, + ) + + all_dict_type = all(isinstance(item, dict) for item in value) + all_search_space_type = all(isinstance(item, SearchSpace) for item in value) + + if not (all_search_space_type or all_dict_type): + msg = "Expected all items in the list to be either dictionaries or SearchSpace objects." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.AUTOML, + error_category=ErrorCategory.USER_ERROR, + ) + + self._search_space = [ + cast_to_specific_search_space(item, NlpSearchSpace, self.task_type) for item in value # type: ignore + ] + + @property + def primary_metric(self) -> str: + """Primary metric to display from NLP job + + :return: primary metric to display + :rtype: str + """ + return self._primary_metric + + @primary_metric.setter + def primary_metric(self, value: str) -> None: + self._primary_metric = value + + @property + def log_verbosity(self) -> LogVerbosity: + """Log verbosity configuration + + :return: the degree of verbosity used in logging + :rtype: ~azure.mgmt.machinelearningservices.models.LogVerbosity + """ + return self._log_verbosity + + @log_verbosity.setter + def log_verbosity(self, value: Union[str, LogVerbosity]) -> None: + self._log_verbosity = None if value is None else LogVerbosity[camel_to_snake(value).upper()] + + @property + def limits(self) -> NlpLimitSettings: + """Limit settings for NLP jobs + + :return: limit configuration for NLP job + :rtype: ~azure.ai.ml.automl.NlpLimitSettings + """ + return self._limits + + @limits.setter + def limits(self, value: Union[Dict, NlpLimitSettings]) -> None: + if isinstance(value, NlpLimitSettings): + self._limits = value + else: + if not isinstance(value, dict): + msg = "Expected a dictionary for limit settings." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.AUTOML, + error_category=ErrorCategory.USER_ERROR, + ) + self.set_limits(**value) + + @property + def sweep(self) -> Optional[NlpSweepSettings]: + """Sweep settings used for NLP job + + :return: sweep settings + :rtype: ~azure.ai.ml.automl.NlpSweepSettings + """ + return self._sweep + + @sweep.setter + def sweep(self, value: Union[Dict, NlpSweepSettings]) -> None: + if isinstance(value, NlpSweepSettings): + self._sweep = value + else: + if not isinstance(value, dict): + msg = "Expected a dictionary for sweep settings." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.AUTOML, + error_category=ErrorCategory.USER_ERROR, + ) + self.set_sweep(**value) + + @property + def featurization(self) -> Optional[NlpFeaturizationSettings]: + """Featurization settings used for NLP job + + :return: featurization settings + :rtype: ~azure.ai.ml.automl.NlpFeaturizationSettings + """ + return self._featurization + + @featurization.setter + def featurization(self, value: Union[Dict, NlpFeaturizationSettings]) -> None: + if isinstance(value, NlpFeaturizationSettings): + self._featurization = value + else: + if not isinstance(value, dict): + msg = "Expected a dictionary for featurization settings." + raise ValidationException( + message=msg, + no_personal_data_message=msg, + target=ErrorTarget.AUTOML, + error_category=ErrorCategory.USER_ERROR, + ) + self.set_featurization(**value) + + def set_data(self, *, training_data: Input, target_column_name: str, validation_data: Input) -> None: + """Define data configuration for NLP job + + :keyword training_data: Training data + :type training_data: ~azure.ai.ml.Input + :keyword target_column_name: Column name of the target column. + :type target_column_name: str + :keyword validation_data: Validation data + :type validation_data: ~azure.ai.ml.Input + """ + # Properties for NlpVerticalDataSettings + self.target_column_name = target_column_name + self.training_data = training_data + self.validation_data = validation_data + + def set_limits( + self, + *, + max_trials: int = 1, + max_concurrent_trials: int = 1, + max_nodes: int = 1, + timeout_minutes: Optional[int] = None, + trial_timeout_minutes: Optional[int] = None, + ) -> None: + """Define limit configuration for AutoML NLP job + + :keyword max_trials: Maximum number of AutoML iterations, defaults to 1 + :type max_trials: int, optional + :keyword max_concurrent_trials: Maximum number of concurrent AutoML iterations, defaults to 1 + :type max_concurrent_trials: int, optional + :keyword max_nodes: Maximum number of nodes used for sweep, defaults to 1 + :type max_nodes: int, optional + :keyword timeout_minutes: Timeout for the AutoML job, defaults to None + :type timeout_minutes: Optional[int] + :keyword trial_timeout_minutes: Timeout for each AutoML trial, defaults to None + :type trial_timeout_minutes: Optional[int] + """ + self._limits = NlpLimitSettings( + max_trials=max_trials, + max_concurrent_trials=max_concurrent_trials, + max_nodes=max_nodes, + timeout_minutes=timeout_minutes, + trial_timeout_minutes=trial_timeout_minutes, + ) + + def set_sweep( + self, + *, + sampling_algorithm: Union[str, SamplingAlgorithmType], + early_termination: Optional[EarlyTerminationPolicy] = None, + ) -> None: + """Define sweep configuration for AutoML NLP job + + :keyword sampling_algorithm: Required. Specifies type of hyperparameter sampling algorithm. + Possible values include: "Grid", "Random", and "Bayesian". + :type sampling_algorithm: Union[str, ~azure.ai.ml.automl.SamplingAlgorithmType] + :keyword early_termination: Optional. early termination policy to end poorly performing training candidates, + defaults to None. + :type early_termination: Optional[~azure.mgmt.machinelearningservices.models.EarlyTerminationPolicy] + """ + if self._sweep: + self._sweep.sampling_algorithm = sampling_algorithm + else: + self._sweep = NlpSweepSettings(sampling_algorithm=sampling_algorithm) + + self._sweep.early_termination = early_termination or self._sweep.early_termination + + def set_training_parameters( + self, + *, + gradient_accumulation_steps: Optional[int] = None, + learning_rate: Optional[float] = None, + learning_rate_scheduler: Optional[Union[str, NlpLearningRateScheduler]] = None, + model_name: Optional[str] = None, + number_of_epochs: Optional[int] = None, + training_batch_size: Optional[int] = None, + validation_batch_size: Optional[int] = None, + warmup_ratio: Optional[float] = None, + weight_decay: Optional[float] = None, + ) -> None: + """Fix certain training parameters throughout the training procedure for all candidates. + + :keyword gradient_accumulation_steps: number of steps over which to accumulate gradients before a backward + pass. This must be a positive integer., defaults to None + :type gradient_accumulation_steps: Optional[int] + :keyword learning_rate: initial learning rate. Must be a float in (0, 1)., defaults to None + :type learning_rate: Optional[float] + :keyword learning_rate_scheduler: the type of learning rate scheduler. Must choose from 'linear', 'cosine', + 'cosine_with_restarts', 'polynomial', 'constant', and 'constant_with_warmup'., defaults to None + :type learning_rate_scheduler: Optional[Union[str, ~azure.ai.ml.automl.NlpLearningRateScheduler]] + :keyword model_name: the model name to use during training. Must choose from 'bert-base-cased', + 'bert-base-uncased', 'bert-base-multilingual-cased', 'bert-base-german-cased', 'bert-large-cased', + 'bert-large-uncased', 'distilbert-base-cased', 'distilbert-base-uncased', 'roberta-base', 'roberta-large', + 'distilroberta-base', 'xlm-roberta-base', 'xlm-roberta-large', xlnet-base-cased', and 'xlnet-large-cased'., + defaults to None + :type model_name: Optional[str] + :keyword number_of_epochs: the number of epochs to train with. Must be a positive integer., defaults to None + :type number_of_epochs: Optional[int] + :keyword training_batch_size: the batch size during training. Must be a positive integer., defaults to None + :type training_batch_size: Optional[int] + :keyword validation_batch_size: the batch size during validation. Must be a positive integer., defaults to None + :type validation_batch_size: Optional[int] + :keyword warmup_ratio: ratio of total training steps used for a linear warmup from 0 to learning_rate. + Must be a float in [0, 1]., defaults to None + :type warmup_ratio: Optional[float] + :keyword weight_decay: value of weight decay when optimizer is sgd, adam, or adamw. This must be a float in + the range [0, 1]., defaults to None + :type weight_decay: Optional[float] + """ + self._training_parameters = self._training_parameters or NlpFixedParameters() + + self._training_parameters.gradient_accumulation_steps = ( + gradient_accumulation_steps + if gradient_accumulation_steps is not None + else self._training_parameters.gradient_accumulation_steps + ) + + self._training_parameters.learning_rate = ( + learning_rate if learning_rate is not None else self._training_parameters.learning_rate + ) + + self._training_parameters.learning_rate_scheduler = ( + NlpLearningRateScheduler[camel_to_snake(learning_rate_scheduler).upper()] + if learning_rate_scheduler is not None + else self._training_parameters.learning_rate_scheduler + ) + + self._training_parameters.model_name = ( + model_name if model_name is not None else self._training_parameters.model_name + ) + + self._training_parameters.number_of_epochs = ( + number_of_epochs if number_of_epochs is not None else self._training_parameters.number_of_epochs + ) + + self._training_parameters.training_batch_size = ( + training_batch_size if training_batch_size is not None else self._training_parameters.training_batch_size + ) + + self._training_parameters.validation_batch_size = ( + validation_batch_size + if validation_batch_size is not None + else self._training_parameters.validation_batch_size + ) + + self._training_parameters.warmup_ratio = ( + warmup_ratio if warmup_ratio is not None else self._training_parameters.warmup_ratio + ) + + self._training_parameters.weight_decay = ( + weight_decay if weight_decay is not None else self._training_parameters.weight_decay + ) + + def set_featurization(self, *, dataset_language: Optional[str] = None) -> None: + """Define featurization configuration for AutoML NLP job. + + :keyword dataset_language: Language of the dataset, defaults to None + :type dataset_language: Optional[str] + """ + self._featurization = NlpFeaturizationSettings( + dataset_language=dataset_language, + ) + + def extend_search_space(self, value: Union[SearchSpace, List[SearchSpace]]) -> None: + """Add (a) search space(s) for an AutoML NLP job. + + :param value: either a SearchSpace object or a list of SearchSpace objects with nlp-specific parameters. + :type value: Union[~azure.ai.ml.automl.SearchSpace, List[~azure.ai.ml.automl.SearchSpace]] + """ + self._search_space = self._search_space or [] + if isinstance(value, list): + self._search_space.extend( + [cast_to_specific_search_space(item, NlpSearchSpace, self.task_type) for item in value] # type: ignore + ) + else: + self._search_space.append( + cast_to_specific_search_space(value, NlpSearchSpace, self.task_type) # type: ignore + ) + + @classmethod + def _get_search_space_from_str(cls, search_space_str: Optional[str]) -> Optional[List]: + if search_space_str is not None: + return [NlpSearchSpace._from_rest_object(entry) for entry in search_space_str if entry is not None] + return None + + def _restore_data_inputs(self) -> None: + """Restore MLTableJobInputs to Inputs within data_settings. + + self.training_data and self.validation_data should reflect what user passed in (Input) Once we get response back + from service (as MLTableJobInput), we should set responsible ones back to Input + """ + super()._restore_data_inputs() + self.training_data = self.training_data if self.training_data else None # type: ignore + self.validation_data = self.validation_data if self.validation_data else None # type: ignore + + def __eq__(self, other: object) -> bool: + if not isinstance(other, AutoMLNLPJob): + return NotImplemented + + return ( + self.primary_metric == other.primary_metric + and self.log_verbosity == other.log_verbosity + and self.training_data == other.training_data + and self.validation_data == other.validation_data + and self._featurization == other._featurization + and self._limits == other._limits + and self._sweep == other._sweep + and self._training_parameters == other._training_parameters + and self._search_space == other._search_space + ) + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/nlp_featurization_settings.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/nlp_featurization_settings.py new file mode 100644 index 00000000..5649dea2 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/nlp_featurization_settings.py @@ -0,0 +1,47 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +from azure.ai.ml._restclient.v2023_04_01_preview.models import ( + NlpVerticalFeaturizationSettings as RestNlpVerticalFeaturizationSettings, +) +from azure.ai.ml.entities._job.automl.featurization_settings import FeaturizationSettings, FeaturizationSettingsType + + +class NlpFeaturizationSettings(FeaturizationSettings): + """Featurization settings for all AutoML NLP Verticals. + + :ivar type: Specifies the type of FeaturizationSettings. Set automatically to "NLP" for this class. + :vartype type: str + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_automl_nlp.py + :start-after: [START automl.nlp_featurization_settings] + :end-before: [END automl.nlp_featurization_settings] + :language: python + :dedent: 8 + :caption: creating an nlp featurization settings + """ + + type = FeaturizationSettingsType.NLP + + def _to_rest_object(self) -> RestNlpVerticalFeaturizationSettings: + return RestNlpVerticalFeaturizationSettings( + dataset_language=self.dataset_language, + ) + + @classmethod + def _from_rest_object(cls, obj: RestNlpVerticalFeaturizationSettings) -> "NlpFeaturizationSettings": + return cls( + dataset_language=obj.dataset_language, + ) + + def __eq__(self, other: object) -> bool: + if not isinstance(other, NlpFeaturizationSettings): + return NotImplemented + + return super().__eq__(other) + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/nlp_fixed_parameters.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/nlp_fixed_parameters.py new file mode 100644 index 00000000..13c594b6 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/nlp_fixed_parameters.py @@ -0,0 +1,117 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- +from typing import Optional + +from azure.ai.ml._restclient.v2023_04_01_preview.models import NlpFixedParameters as RestNlpFixedParameters +from azure.ai.ml.entities._mixins import RestTranslatableMixin + + +class NlpFixedParameters(RestTranslatableMixin): + """Configuration of fixed parameters for all candidates of an AutoML NLP Job + + :param gradient_accumulation_steps: number of steps over which to accumulate gradients before a backward + pass. This must be a positive integer, defaults to None + :type gradient_accumulation_steps: Optional[int] + :param learning_rate: initial learning rate. Must be a float in (0, 1), defaults to None + :type learning_rate: Optional[float] + :param learning_rate_scheduler: the type of learning rate scheduler. Must choose from 'linear', 'cosine', + 'cosine_with_restarts', 'polynomial', 'constant', and 'constant_with_warmup', defaults to None + :type learning_rate_scheduler: Optional[str] + :param model_name: the model name to use during training. Must choose from 'bert-base-cased', + 'bert-base-uncased', 'bert-base-multilingual-cased', 'bert-base-german-cased', 'bert-large-cased', + 'bert-large-uncased', 'distilbert-base-cased', 'distilbert-base-uncased', 'roberta-base', 'roberta-large', + 'distilroberta-base', 'xlm-roberta-base', 'xlm-roberta-large', xlnet-base-cased', and 'xlnet-large-cased', + defaults to None + :type model_name: Optional[str] + :param number_of_epochs: the number of epochs to train with. Must be a positive integer, defaults to None + :type number_of_epochs: Optional[int] + :param training_batch_size: the batch size during training. Must be a positive integer, defaults to None + :type training_batch_size: Optional[int] + :param validation_batch_size: the batch size during validation. Must be a positive integer, defaults to None + :type validation_batch_size: Optional[int] + :param warmup_ratio: ratio of total training steps used for a linear warmup from 0 to learning_rate. + Must be a float in [0, 1], defaults to None + :type warmup_ratio: Optional[float] + :param weight_decay: value of weight decay when optimizer is sgd, adam, or adamw. This must be a float in + the range [0, 1] defaults to None + :type weight_decay: Optional[float] + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_automl_nlp.py + :start-after: [START automl.nlp_fixed_parameters] + :end-before: [END automl.nlp_fixed_parameters] + :language: python + :dedent: 8 + :caption: creating an nlp fixed parameters + """ + + def __init__( + self, + *, + gradient_accumulation_steps: Optional[int] = None, + learning_rate: Optional[float] = None, + learning_rate_scheduler: Optional[str] = None, + model_name: Optional[str] = None, + number_of_epochs: Optional[int] = None, + training_batch_size: Optional[int] = None, + validation_batch_size: Optional[int] = None, + warmup_ratio: Optional[float] = None, + weight_decay: Optional[float] = None, + ): + self.gradient_accumulation_steps = gradient_accumulation_steps + self.learning_rate = learning_rate + self.learning_rate_scheduler = learning_rate_scheduler + self.model_name = model_name + self.number_of_epochs = number_of_epochs + self.training_batch_size = training_batch_size + self.validation_batch_size = validation_batch_size + self.warmup_ratio = warmup_ratio + self.weight_decay = weight_decay + + def _to_rest_object(self) -> RestNlpFixedParameters: + return RestNlpFixedParameters( + gradient_accumulation_steps=self.gradient_accumulation_steps, + learning_rate=self.learning_rate, + learning_rate_scheduler=self.learning_rate_scheduler, + model_name=self.model_name, + number_of_epochs=self.number_of_epochs, + training_batch_size=self.training_batch_size, + validation_batch_size=self.validation_batch_size, + warmup_ratio=self.warmup_ratio, + weight_decay=self.weight_decay, + ) + + @classmethod + def _from_rest_object(cls, obj: RestNlpFixedParameters) -> "NlpFixedParameters": + return cls( + gradient_accumulation_steps=obj.gradient_accumulation_steps, + learning_rate=obj.learning_rate, + learning_rate_scheduler=obj.learning_rate_scheduler, + model_name=obj.model_name, + number_of_epochs=obj.number_of_epochs, + training_batch_size=obj.training_batch_size, + validation_batch_size=obj.validation_batch_size, + warmup_ratio=obj.warmup_ratio, + weight_decay=obj.weight_decay, + ) + + def __eq__(self, other: object) -> bool: + if not isinstance(other, NlpFixedParameters): + return NotImplemented + + return ( + self.gradient_accumulation_steps == other.gradient_accumulation_steps + and self.learning_rate == other.learning_rate + and self.learning_rate_scheduler == other.learning_rate_scheduler + and self.model_name == other.model_name + and self.number_of_epochs == other.number_of_epochs + and self.training_batch_size == other.training_batch_size + and self.validation_batch_size == other.validation_batch_size + and self.warmup_ratio == other.warmup_ratio + and self.weight_decay == other.weight_decay + ) + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/nlp_limit_settings.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/nlp_limit_settings.py new file mode 100644 index 00000000..1e99f4f0 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/nlp_limit_settings.py @@ -0,0 +1,79 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +from typing import Optional + +from azure.ai.ml._restclient.v2023_04_01_preview.models import NlpVerticalLimitSettings as RestNlpLimitSettings +from azure.ai.ml._utils.utils import from_iso_duration_format_mins, to_iso_duration_format_mins +from azure.ai.ml.entities._mixins import RestTranslatableMixin + + +class NlpLimitSettings(RestTranslatableMixin): + """Limit settings for all AutoML NLP Verticals. + + :param max_concurrent_trials: Maximum number of concurrent AutoML iterations. + :type max_concurrent_trials: int + :param max_trials: Maximum number of AutoML iterations. + :type max_trials: int + :param timeout_minutes: AutoML job timeout. + :type timeout_minutes: int + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_automl_nlp.py + :start-after: [START automl.nlp_limit_settings] + :end-before: [END automl.nlp_limit_settings] + :language: python + :dedent: 8 + :caption: creating an nlp limit settings + """ + + def __init__( + self, + *, + max_concurrent_trials: Optional[int] = None, + max_trials: int = 1, + max_nodes: int = 1, + timeout_minutes: Optional[int] = None, + trial_timeout_minutes: Optional[int] = None, + ): + self.max_concurrent_trials = max_concurrent_trials + self.max_trials = max_trials + self.max_nodes = max_nodes + self.timeout_minutes = timeout_minutes + self.trial_timeout_minutes = trial_timeout_minutes + + def _to_rest_object(self) -> RestNlpLimitSettings: + return RestNlpLimitSettings( + max_concurrent_trials=self.max_concurrent_trials, + max_trials=self.max_trials, + max_nodes=self.max_nodes, + timeout=to_iso_duration_format_mins(self.timeout_minutes), + trial_timeout=to_iso_duration_format_mins(self.trial_timeout_minutes), + ) + + @classmethod + def _from_rest_object(cls, obj: RestNlpLimitSettings) -> "NlpLimitSettings": + return cls( + max_concurrent_trials=obj.max_concurrent_trials, + max_trials=obj.max_trials, + max_nodes=obj.max_nodes, + timeout_minutes=from_iso_duration_format_mins(obj.timeout), + trial_timeout_minutes=from_iso_duration_format_mins(obj.trial_timeout), + ) + + def __eq__(self, other: object) -> bool: + if not isinstance(other, NlpLimitSettings): + return NotImplemented + + return ( + self.max_concurrent_trials == other.max_concurrent_trials + and self.max_trials == other.max_trials + and self.max_nodes == other.max_nodes + and self.timeout_minutes == other.timeout_minutes + and self.trial_timeout_minutes == other.trial_timeout_minutes + ) + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/nlp_search_space.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/nlp_search_space.py new file mode 100644 index 00000000..e4ad435f --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/nlp_search_space.py @@ -0,0 +1,185 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +from typing import Optional, Union + +from azure.ai.ml._restclient.v2023_04_01_preview.models import NlpLearningRateScheduler, NlpParameterSubspace +from azure.ai.ml._utils.utils import camel_to_snake +from azure.ai.ml.constants import NlpModels +from azure.ai.ml.entities._job.automl.search_space import SearchSpace +from azure.ai.ml.entities._job.automl.search_space_utils import _convert_from_rest_object, _convert_to_rest_object +from azure.ai.ml.entities._job.sweep.search_space import Choice, SweepDistribution +from azure.ai.ml.entities._mixins import RestTranslatableMixin + + +class NlpSearchSpace(RestTranslatableMixin): + """Search space for AutoML NLP tasks. + + :param gradient_accumulation_steps: number of steps over which to accumulate gradients before a backward + pass. This must be a positive integer., defaults to None + :type gradient_accumulation_steps: Optional[Union[int, SweepDistribution]] + :param learning_rate: initial learning rate. Must be a float in (0, 1), defaults to None + :type learning_rate: Optional[Union[float, SweepDistribution]] + :param learning_rate_scheduler: the type of learning rate scheduler. Must choose from 'linear', 'cosine', + 'cosine_with_restarts', 'polynomial', 'constant', and 'constant_with_warmup', defaults to None + :type learning_rate_scheduler: Optional[Union[str, SweepDistribution]] + :param model_name: the model name to use during training. Must choose from 'bert-base-cased', + 'bert-base-uncased', 'bert-base-multilingual-cased', 'bert-base-german-cased', 'bert-large-cased', + 'bert-large-uncased', 'distilbert-base-cased', 'distilbert-base-uncased', 'roberta-base', 'roberta-large', + 'distilroberta-base', 'xlm-roberta-base', 'xlm-roberta-large', xlnet-base-cased', and 'xlnet-large-cased', + defaults to None + :type model_name: Optional[Union[str, SweepDistribution]] + :param number_of_epochs: the number of epochs to train with. Must be a positive integer, defaults to None + :type number_of_epochs: Optional[Union[int, SweepDistribution]] + :param training_batch_size: the batch size during training. Must be a positive integer, defaults to None + :type training_batch_size: Optional[Union[int, SweepDistribution]] + :param validation_batch_size: the batch size during validation. Must be a positive integer, defaults to None + :type validation_batch_size: Optional[Union[int, SweepDistribution]] + :param warmup_ratio: ratio of total training steps used for a linear warmup from 0 to learning_rate. + Must be a float in [0, 1], defaults to None + :type warmup_ratio: Optional[Union[float, SweepDistribution]] + :param weight_decay: value of weight decay when optimizer is sgd, adam, or adamw. This must be a float in + the range [0, 1], defaults to None + :type weight_decay: Optional[Union[float, SweepDistribution]] + + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_automl_nlp.py + :start-after: [START automl.nlp_search_space] + :end-before: [END automl.nlp_search_space] + :language: python + :dedent: 8 + :caption: creating an nlp search space + """ + + def __init__( + self, + *, + gradient_accumulation_steps: Optional[Union[int, SweepDistribution]] = None, + learning_rate: Optional[Union[float, SweepDistribution]] = None, + learning_rate_scheduler: Optional[Union[str, SweepDistribution]] = None, + model_name: Optional[Union[str, SweepDistribution]] = None, + number_of_epochs: Optional[Union[int, SweepDistribution]] = None, + training_batch_size: Optional[Union[int, SweepDistribution]] = None, + validation_batch_size: Optional[Union[int, SweepDistribution]] = None, + warmup_ratio: Optional[Union[float, SweepDistribution]] = None, + weight_decay: Optional[Union[float, SweepDistribution]] = None + ): + # Since we want customers to be able to specify enums as well rather than just strings, we need to access + # the enum values here before we serialize them ('NlpModels.BERT_BASE_CASED' vs. 'bert-base-cased'). + if isinstance(learning_rate_scheduler, NlpLearningRateScheduler): + learning_rate_scheduler = camel_to_snake(learning_rate_scheduler.value) + elif isinstance(learning_rate_scheduler, Choice): + if learning_rate_scheduler.values is not None: + learning_rate_scheduler.values = [ + camel_to_snake(item.value) if isinstance(item, NlpLearningRateScheduler) else item + for item in learning_rate_scheduler.values + ] + + if isinstance(model_name, NlpModels): + model_name = model_name.value + elif isinstance(model_name, Choice): + if model_name.values is not None: + model_name.values = [item.value if isinstance(item, NlpModels) else item for item in model_name.values] + + self.gradient_accumulation_steps = gradient_accumulation_steps + self.learning_rate = learning_rate + self.learning_rate_scheduler = learning_rate_scheduler + self.model_name = model_name + self.number_of_epochs = number_of_epochs + self.training_batch_size = training_batch_size + self.validation_batch_size = validation_batch_size + self.warmup_ratio = warmup_ratio + self.weight_decay = weight_decay + + def _to_rest_object(self) -> NlpParameterSubspace: + return NlpParameterSubspace( + gradient_accumulation_steps=( + _convert_to_rest_object(self.gradient_accumulation_steps) + if self.gradient_accumulation_steps is not None + else None + ), + learning_rate=_convert_to_rest_object(self.learning_rate) if self.learning_rate is not None else None, + learning_rate_scheduler=( + _convert_to_rest_object(self.learning_rate_scheduler) + if self.learning_rate_scheduler is not None + else None + ), + model_name=_convert_to_rest_object(self.model_name) if self.model_name is not None else None, + number_of_epochs=( + _convert_to_rest_object(self.number_of_epochs) if self.number_of_epochs is not None else None + ), + training_batch_size=( + _convert_to_rest_object(self.training_batch_size) if self.training_batch_size is not None else None + ), + validation_batch_size=( + _convert_to_rest_object(self.validation_batch_size) if self.validation_batch_size is not None else None + ), + warmup_ratio=_convert_to_rest_object(self.warmup_ratio) if self.warmup_ratio is not None else None, + weight_decay=_convert_to_rest_object(self.weight_decay) if self.weight_decay is not None else None, + ) + + @classmethod + def _from_rest_object(cls, obj: NlpParameterSubspace) -> "NlpSearchSpace": + return cls( + gradient_accumulation_steps=( + _convert_from_rest_object(obj.gradient_accumulation_steps) + if obj.gradient_accumulation_steps is not None + else None + ), + learning_rate=_convert_from_rest_object(obj.learning_rate) if obj.learning_rate is not None else None, + learning_rate_scheduler=( + _convert_from_rest_object(obj.learning_rate_scheduler) + if obj.learning_rate_scheduler is not None + else None + ), + model_name=_convert_from_rest_object(obj.model_name) if obj.model_name is not None else None, + number_of_epochs=( + _convert_from_rest_object(obj.number_of_epochs) if obj.number_of_epochs is not None else None + ), + training_batch_size=( + _convert_from_rest_object(obj.training_batch_size) if obj.training_batch_size is not None else None + ), + validation_batch_size=( + _convert_from_rest_object(obj.validation_batch_size) if obj.validation_batch_size is not None else None + ), + warmup_ratio=_convert_from_rest_object(obj.warmup_ratio) if obj.warmup_ratio is not None else None, + weight_decay=_convert_from_rest_object(obj.weight_decay) if obj.weight_decay is not None else None, + ) + + @classmethod + def _from_search_space_object(cls, obj: SearchSpace) -> "NlpSearchSpace": + return cls( + gradient_accumulation_steps=( + obj.gradient_accumulation_steps if hasattr(obj, "gradient_accumulation_steps") else None + ), + learning_rate=obj.learning_rate if hasattr(obj, "learning_rate") else None, + learning_rate_scheduler=obj.learning_rate_scheduler if hasattr(obj, "learning_rate_scheduler") else None, + model_name=obj.model_name if hasattr(obj, "model_name") else None, + number_of_epochs=obj.number_of_epochs if hasattr(obj, "number_of_epochs") else None, + training_batch_size=obj.training_batch_size if hasattr(obj, "training_batch_size") else None, + validation_batch_size=obj.validation_batch_size if hasattr(obj, "validation_batch_size") else None, + warmup_ratio=obj.warmup_ratio if hasattr(obj, "warmup_ratio") else None, + weight_decay=obj.weight_decay if hasattr(obj, "weight_decay") else None, + ) + + def __eq__(self, other: object) -> bool: + if not isinstance(other, NlpSearchSpace): + return NotImplemented + + return ( + self.gradient_accumulation_steps == other.gradient_accumulation_steps + and self.learning_rate == other.learning_rate + and self.learning_rate_scheduler == other.learning_rate_scheduler + and self.model_name == other.model_name + and self.number_of_epochs == other.number_of_epochs + and self.training_batch_size == other.training_batch_size + and self.validation_batch_size == other.validation_batch_size + and self.warmup_ratio == other.warmup_ratio + and self.weight_decay == other.weight_decay + ) + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/nlp_sweep_settings.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/nlp_sweep_settings.py new file mode 100644 index 00000000..e446a30c --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/nlp_sweep_settings.py @@ -0,0 +1,65 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +from typing import Optional, Union + +from azure.ai.ml._restclient.v2023_04_01_preview.models import NlpSweepSettings as RestNlpSweepSettings +from azure.ai.ml._restclient.v2023_04_01_preview.models import SamplingAlgorithmType +from azure.ai.ml.entities._job.sweep.early_termination_policy import EarlyTerminationPolicy +from azure.ai.ml.entities._mixins import RestTranslatableMixin + + +# pylint: disable=protected-access +class NlpSweepSettings(RestTranslatableMixin): + """Sweep settings for all AutoML NLP tasks. + + :param sampling_algorithm: Required. Specifies type of hyperparameter sampling algorithm. + Possible values include: "Grid", "Random", and "Bayesian". + :type sampling_algorithm: Union[str, ~azure.ai.ml.automl.SamplingAlgorithmType] + :param early_termination: Early termination policy to end poorly performing training candidates, + defaults to None. + :type early_termination: Optional[~azure.mgmt.machinelearningservices.models.EarlyTerminationPolicy] + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_automl_nlp.py + :start-after: [START automl.nlp_sweep_settings] + :end-before: [END automl.nlp_sweep_settings] + :language: python + :dedent: 8 + :caption: creating an nlp sweep settings + """ + + def __init__( + self, + *, + sampling_algorithm: Union[str, SamplingAlgorithmType], + early_termination: Optional[EarlyTerminationPolicy] = None, + ): + self.sampling_algorithm = sampling_algorithm + self.early_termination = early_termination + + def _to_rest_object(self) -> RestNlpSweepSettings: + return RestNlpSweepSettings( + sampling_algorithm=self.sampling_algorithm, + early_termination=self.early_termination._to_rest_object() if self.early_termination else None, + ) + + @classmethod + def _from_rest_object(cls, obj: RestNlpSweepSettings) -> "NlpSweepSettings": + return cls( + sampling_algorithm=obj.sampling_algorithm, + early_termination=( + EarlyTerminationPolicy._from_rest_object(obj.early_termination) if obj.early_termination else None + ), + ) + + def __eq__(self, other: object) -> bool: + if not isinstance(other, NlpSweepSettings): + return NotImplemented + + return self.sampling_algorithm == other.sampling_algorithm and self.early_termination == other.early_termination + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/text_classification_job.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/text_classification_job.py new file mode 100644 index 00000000..290f4f70 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/text_classification_job.py @@ -0,0 +1,248 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=protected-access + +from typing import TYPE_CHECKING, Any, Dict, Optional, Union + +from azure.ai.ml._restclient.v2023_04_01_preview.models import AutoMLJob as RestAutoMLJob +from azure.ai.ml._restclient.v2023_04_01_preview.models import JobBase, TaskType +from azure.ai.ml._restclient.v2023_04_01_preview.models._azure_machine_learning_workspaces_enums import ( + ClassificationPrimaryMetrics, +) +from azure.ai.ml._restclient.v2024_01_01_preview.models import TextClassification as RestTextClassification +from azure.ai.ml._utils.utils import camel_to_snake, is_data_binding_expression +from azure.ai.ml.constants._common import BASE_PATH_CONTEXT_KEY +from azure.ai.ml.constants._job.automl import AutoMLConstants +from azure.ai.ml.entities._credentials import _BaseJobIdentityConfiguration +from azure.ai.ml.entities._inputs_outputs import Input +from azure.ai.ml.entities._job._input_output_helpers import from_rest_data_outputs, to_rest_data_outputs +from azure.ai.ml.entities._job.automl.nlp.automl_nlp_job import AutoMLNLPJob +from azure.ai.ml.entities._job.automl.nlp.nlp_featurization_settings import NlpFeaturizationSettings +from azure.ai.ml.entities._job.automl.nlp.nlp_fixed_parameters import NlpFixedParameters +from azure.ai.ml.entities._job.automl.nlp.nlp_limit_settings import NlpLimitSettings +from azure.ai.ml.entities._job.automl.nlp.nlp_sweep_settings import NlpSweepSettings +from azure.ai.ml.entities._system_data import SystemData +from azure.ai.ml.entities._util import load_from_dict + +# avoid circular import error +if TYPE_CHECKING: + from azure.ai.ml.entities._component.component import Component + + +class TextClassificationJob(AutoMLNLPJob): + """Configuration for AutoML Text Classification Job. + + :param target_column_name: The name of the target column, defaults to None + :type target_column_name: Optional[str] + :param training_data: Training data to be used for training, defaults to None + :type training_data: Optional[~azure.ai.ml.Input] + :param validation_data: Validation data to be used for evaluating the trained model, defaults to None + :type validation_data: Optional[~azure.ai.ml.Input] + :param primary_metric: The primary metric to be displayed, defaults to None + :type primary_metric: Optional[~azure.ai.ml.automl.ClassificationPrimaryMetrics] + :param log_verbosity: Log verbosity level, defaults to None + :type log_verbosity: Optional[str] + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_automl_nlp.py + :start-after: [START automl.automl_nlp_job.text_classification_job] + :end-before: [END automl.automl_nlp_job.text_classification_job] + :language: python + :dedent: 8 + :caption: creating an automl text classification job + """ + + _DEFAULT_PRIMARY_METRIC = ClassificationPrimaryMetrics.ACCURACY + + def __init__( + self, + *, + target_column_name: Optional[str] = None, + training_data: Optional[Input] = None, + validation_data: Optional[Input] = None, + primary_metric: Optional[ClassificationPrimaryMetrics] = None, + log_verbosity: Optional[str] = None, + **kwargs: Any + ): + super().__init__( + task_type=TaskType.TEXT_CLASSIFICATION, + primary_metric=primary_metric or TextClassificationJob._DEFAULT_PRIMARY_METRIC, + target_column_name=target_column_name, + training_data=training_data, + validation_data=validation_data, + log_verbosity=log_verbosity, + **kwargs, + ) + + @property + def primary_metric(self) -> Union[str, ClassificationPrimaryMetrics]: + return self._primary_metric + + @primary_metric.setter + def primary_metric(self, value: Union[str, ClassificationPrimaryMetrics]) -> None: + """setter for primary metric + + :param value: _description_ + :type value: Union[str, ClassificationPrimaryMetrics] + """ + if is_data_binding_expression(str(value), ["parent"]): + self._primary_metric = value + return + + self._primary_metric = ( + TextClassificationJob._DEFAULT_PRIMARY_METRIC + if value is None + else ClassificationPrimaryMetrics[camel_to_snake(value).upper()] + ) + + def _to_rest_object(self) -> JobBase: + text_classification = RestTextClassification( + target_column_name=self.target_column_name, + training_data=self.training_data, + validation_data=self.validation_data, + limit_settings=self._limits._to_rest_object() if self._limits else None, + sweep_settings=self._sweep._to_rest_object() if self._sweep else None, + fixed_parameters=self._training_parameters._to_rest_object() if self._training_parameters else None, + search_space=( + [entry._to_rest_object() for entry in self._search_space if entry is not None] + if self._search_space is not None + else None + ), + featurization_settings=self._featurization._to_rest_object() if self._featurization else None, + primary_metric=self.primary_metric, + log_verbosity=self.log_verbosity, + ) + # resolve data inputs in rest object + self._resolve_data_inputs(text_classification) + + properties = RestAutoMLJob( + display_name=self.display_name, + description=self.description, + experiment_name=self.experiment_name, + tags=self.tags, + compute_id=self.compute, + properties=self.properties, + environment_id=self.environment_id, + environment_variables=self.environment_variables, + services=self.services, + outputs=to_rest_data_outputs(self.outputs), + resources=self.resources, + task_details=text_classification, + identity=self.identity._to_job_rest_object() if self.identity else None, + queue_settings=self.queue_settings, + ) + + result = JobBase(properties=properties) + result.name = self.name + return result + + @classmethod + def _from_rest_object(cls, obj: JobBase) -> "TextClassificationJob": + properties: RestAutoMLJob = obj.properties + task_details: RestTextClassification = properties.task_details + assert isinstance(task_details, RestTextClassification) + limits = ( + NlpLimitSettings._from_rest_object(task_details.limit_settings) if task_details.limit_settings else None + ) + featurization = ( + NlpFeaturizationSettings._from_rest_object(task_details.featurization_settings) + if task_details.featurization_settings + else None + ) + sweep = NlpSweepSettings._from_rest_object(task_details.sweep_settings) if task_details.sweep_settings else None + training_parameters = ( + NlpFixedParameters._from_rest_object(task_details.fixed_parameters) + if task_details.fixed_parameters + else None + ) + + text_classification_job = cls( + # ----- job specific params + id=obj.id, + name=obj.name, + description=properties.description, + tags=properties.tags, + properties=properties.properties, + experiment_name=properties.experiment_name, + services=properties.services, + status=properties.status, + creation_context=SystemData._from_rest_object(obj.system_data) if obj.system_data else None, + display_name=properties.display_name, + compute=properties.compute_id, + outputs=from_rest_data_outputs(properties.outputs), + resources=properties.resources, + # ----- task specific params + primary_metric=task_details.primary_metric, + log_verbosity=task_details.log_verbosity, + target_column_name=task_details.target_column_name, + training_data=task_details.training_data, + validation_data=task_details.validation_data, + limits=limits, + sweep=sweep, + training_parameters=training_parameters, + search_space=cls._get_search_space_from_str(task_details.search_space), + featurization=featurization, + identity=( + _BaseJobIdentityConfiguration._from_rest_object(properties.identity) if properties.identity else None + ), + queue_settings=properties.queue_settings, + ) + + text_classification_job._restore_data_inputs() + + return text_classification_job + + def _to_component(self, context: Optional[Dict] = None, **kwargs: Any) -> "Component": + raise NotImplementedError() + + @classmethod + def _load_from_dict( + cls, data: Dict, context: Dict, additional_message: str, **kwargs: Any + ) -> "TextClassificationJob": + from azure.ai.ml._schema.automl.nlp_vertical.text_classification import TextClassificationSchema + + if kwargs.pop("inside_pipeline", False): + from azure.ai.ml._schema.pipeline.automl_node import AutoMLTextClassificationNode + + loaded_data = load_from_dict( + AutoMLTextClassificationNode, + data, + context, + additional_message, + **kwargs, + ) + else: + loaded_data = load_from_dict(TextClassificationSchema, data, context, additional_message, **kwargs) + job_instance = cls._create_instance_from_schema_dict(loaded_data) + return job_instance + + @classmethod + def _create_instance_from_schema_dict(cls, loaded_data: Dict) -> "TextClassificationJob": + loaded_data.pop(AutoMLConstants.TASK_TYPE_YAML, None) + return TextClassificationJob(**loaded_data) + + def _to_dict(self, inside_pipeline: bool = False) -> Dict: + from azure.ai.ml._schema.automl.nlp_vertical.text_classification import TextClassificationSchema + from azure.ai.ml._schema.pipeline.automl_node import AutoMLTextClassificationNode + + if inside_pipeline: + res_autoML: dict = AutoMLTextClassificationNode(context={BASE_PATH_CONTEXT_KEY: "./"}).dump(self) + return res_autoML + + res: dict = TextClassificationSchema(context={BASE_PATH_CONTEXT_KEY: "./"}).dump(self) + return res + + def __eq__(self, other: object) -> bool: + if not isinstance(other, TextClassificationJob): + return NotImplemented + + if not super(TextClassificationJob, self).__eq__(other): + return False + + return self.primary_metric == other.primary_metric + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/text_classification_multilabel_job.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/text_classification_multilabel_job.py new file mode 100644 index 00000000..ac19b451 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/text_classification_multilabel_job.py @@ -0,0 +1,252 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=protected-access + +from typing import TYPE_CHECKING, Any, Dict, Optional, Union + +from azure.ai.ml._restclient.v2023_04_01_preview.models import AutoMLJob as RestAutoMLJob +from azure.ai.ml._restclient.v2023_04_01_preview.models import ClassificationMultilabelPrimaryMetrics, JobBase, TaskType +from azure.ai.ml._restclient.v2024_01_01_preview.models import ( + TextClassificationMultilabel as RestTextClassificationMultilabel, +) +from azure.ai.ml._utils.utils import camel_to_snake, is_data_binding_expression +from azure.ai.ml.constants._common import BASE_PATH_CONTEXT_KEY +from azure.ai.ml.constants._job.automl import AutoMLConstants +from azure.ai.ml.entities._credentials import _BaseJobIdentityConfiguration +from azure.ai.ml.entities._inputs_outputs import Input +from azure.ai.ml.entities._job._input_output_helpers import from_rest_data_outputs, to_rest_data_outputs +from azure.ai.ml.entities._job.automl.nlp.automl_nlp_job import AutoMLNLPJob +from azure.ai.ml.entities._job.automl.nlp.nlp_featurization_settings import NlpFeaturizationSettings +from azure.ai.ml.entities._job.automl.nlp.nlp_fixed_parameters import NlpFixedParameters +from azure.ai.ml.entities._job.automl.nlp.nlp_limit_settings import NlpLimitSettings +from azure.ai.ml.entities._job.automl.nlp.nlp_sweep_settings import NlpSweepSettings +from azure.ai.ml.entities._system_data import SystemData +from azure.ai.ml.entities._util import load_from_dict + +# avoid circular import error +if TYPE_CHECKING: + from azure.ai.ml.entities._component.component import Component + + +class TextClassificationMultilabelJob(AutoMLNLPJob): + """Configuration for AutoML Text Classification Multilabel Job. + + :param target_column_name: The name of the target column, defaults to None + :type target_column_name: Optional[str] + :param training_data: Training data to be used for training, defaults to None + :type training_data: Optional[~azure.ai.ml.Input] + :param validation_data: Validation data to be used for evaluating the trained model, defaults to None + :type validation_data: Optional[~azure.ai.ml.Input] + :param primary_metric: The primary metric to be displayed., defaults to None + :type primary_metric: Optional[str] + :param log_verbosity: Log verbosity level, defaults to None + :type log_verbosity: Optional[str] + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_automl_nlp.py + :start-after: [START automl.text_classification_multilabel_job] + :end-before: [END automl.text_classification_multilabel_job] + :language: python + :dedent: 8 + :caption: creating an automl text classification multilabel job + """ + + _DEFAULT_PRIMARY_METRIC = ClassificationMultilabelPrimaryMetrics.ACCURACY + + def __init__( + self, + *, + target_column_name: Optional[str] = None, + training_data: Optional[Input] = None, + validation_data: Optional[Input] = None, + primary_metric: Optional[str] = None, + log_verbosity: Optional[str] = None, + **kwargs: Any + ): + super().__init__( + task_type=TaskType.TEXT_CLASSIFICATION_MULTILABEL, + primary_metric=primary_metric or TextClassificationMultilabelJob._DEFAULT_PRIMARY_METRIC, + target_column_name=target_column_name, + training_data=training_data, + validation_data=validation_data, + log_verbosity=log_verbosity, + **kwargs, + ) + + @property + def primary_metric(self) -> Union[str, ClassificationMultilabelPrimaryMetrics]: + return self._primary_metric + + @primary_metric.setter + def primary_metric(self, value: Union[str, ClassificationMultilabelPrimaryMetrics]) -> None: + if is_data_binding_expression(str(value), ["parent"]): + self._primary_metric = value + return + + self._primary_metric = ( + TextClassificationMultilabelJob._DEFAULT_PRIMARY_METRIC + if value is None + else ClassificationMultilabelPrimaryMetrics[camel_to_snake(value).upper()] + ) + + def _to_rest_object(self) -> JobBase: + text_classification_multilabel = RestTextClassificationMultilabel( + target_column_name=self.target_column_name, + training_data=self.training_data, + validation_data=self.validation_data, + limit_settings=self._limits._to_rest_object() if self._limits else None, + sweep_settings=self._sweep._to_rest_object() if self._sweep else None, + fixed_parameters=self._training_parameters._to_rest_object() if self._training_parameters else None, + search_space=( + [entry._to_rest_object() for entry in self._search_space if entry is not None] + if self._search_space is not None + else None + ), + featurization_settings=self._featurization._to_rest_object() if self._featurization else None, + primary_metric=self.primary_metric, + log_verbosity=self.log_verbosity, + ) + # resolve data inputs in rest object + self._resolve_data_inputs(text_classification_multilabel) + + properties = RestAutoMLJob( + display_name=self.display_name, + description=self.description, + experiment_name=self.experiment_name, + tags=self.tags, + compute_id=self.compute, + properties=self.properties, + environment_id=self.environment_id, + environment_variables=self.environment_variables, + services=self.services, + outputs=to_rest_data_outputs(self.outputs), + resources=self.resources, + task_details=text_classification_multilabel, + identity=self.identity._to_job_rest_object() if self.identity else None, + queue_settings=self.queue_settings, + ) + + result = JobBase(properties=properties) + result.name = self.name + return result + + @classmethod + def _from_rest_object(cls, obj: JobBase) -> "TextClassificationMultilabelJob": + properties: RestAutoMLJob = obj.properties + task_details: RestTextClassificationMultilabel = properties.task_details + assert isinstance(task_details, RestTextClassificationMultilabel) + limits = ( + NlpLimitSettings._from_rest_object(task_details.limit_settings) if task_details.limit_settings else None + ) + featurization = ( + NlpFeaturizationSettings._from_rest_object(task_details.featurization_settings) + if task_details.featurization_settings + else None + ) + sweep = NlpSweepSettings._from_rest_object(task_details.sweep_settings) if task_details.sweep_settings else None + training_parameters = ( + NlpFixedParameters._from_rest_object(task_details.fixed_parameters) + if task_details.fixed_parameters + else None + ) + + text_classification_multilabel_job = cls( + # ----- job specific params + id=obj.id, + name=obj.name, + description=properties.description, + tags=properties.tags, + properties=properties.properties, + experiment_name=properties.experiment_name, + services=properties.services, + status=properties.status, + creation_context=SystemData._from_rest_object(obj.system_data) if obj.system_data else None, + display_name=properties.display_name, + compute=properties.compute_id, + outputs=from_rest_data_outputs(properties.outputs), + resources=properties.resources, + # ----- task specific params + primary_metric=task_details.primary_metric, + log_verbosity=task_details.log_verbosity, + target_column_name=task_details.target_column_name, + training_data=task_details.training_data, + validation_data=task_details.validation_data, + limits=limits, + sweep=sweep, + training_parameters=training_parameters, + search_space=cls._get_search_space_from_str(task_details.search_space), + featurization=featurization, + identity=( + _BaseJobIdentityConfiguration._from_rest_object(properties.identity) if properties.identity else None + ), + queue_settings=properties.queue_settings, + ) + + text_classification_multilabel_job._restore_data_inputs() + + return text_classification_multilabel_job + + def _to_component(self, context: Optional[Dict] = None, **kwargs: Any) -> "Component": + raise NotImplementedError() + + @classmethod + def _load_from_dict( + cls, data: Dict, context: Dict, additional_message: str, **kwargs: Any + ) -> "TextClassificationMultilabelJob": + from azure.ai.ml._schema.automl.nlp_vertical.text_classification_multilabel import ( + TextClassificationMultilabelSchema, + ) + + if kwargs.pop("inside_pipeline", False): + from azure.ai.ml._schema.pipeline.automl_node import AutoMLTextClassificationMultilabelNode + + loaded_data = load_from_dict( + AutoMLTextClassificationMultilabelNode, + data, + context, + additional_message, + **kwargs, + ) + else: + loaded_data = load_from_dict( + TextClassificationMultilabelSchema, + data, + context, + additional_message, + **kwargs, + ) + job_instance = cls._create_instance_from_schema_dict(loaded_data) + return job_instance + + @classmethod + def _create_instance_from_schema_dict(cls, loaded_data: Dict) -> "TextClassificationMultilabelJob": + loaded_data.pop(AutoMLConstants.TASK_TYPE_YAML, None) + return TextClassificationMultilabelJob(**loaded_data) + + def _to_dict(self, inside_pipeline: bool = False) -> Dict: + from azure.ai.ml._schema.automl.nlp_vertical.text_classification_multilabel import ( + TextClassificationMultilabelSchema, + ) + from azure.ai.ml._schema.pipeline.automl_node import AutoMLTextClassificationMultilabelNode + + if inside_pipeline: + res_autoML: dict = AutoMLTextClassificationMultilabelNode(context={BASE_PATH_CONTEXT_KEY: "./"}).dump(self) + return res_autoML + + res: dict = TextClassificationMultilabelSchema(context={BASE_PATH_CONTEXT_KEY: "./"}).dump(self) + return res + + def __eq__(self, other: object) -> bool: + if not isinstance(other, TextClassificationMultilabelJob): + return NotImplemented + + if not super(TextClassificationMultilabelJob, self).__eq__(other): + return False + + return self.primary_metric == other.primary_metric + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/text_ner_job.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/text_ner_job.py new file mode 100644 index 00000000..a87965f1 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/entities/_job/automl/nlp/text_ner_job.py @@ -0,0 +1,231 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +# pylint: disable=protected-access + +from typing import TYPE_CHECKING, Any, Dict, Optional, Union + +from azure.ai.ml._restclient.v2023_04_01_preview.models import AutoMLJob as RestAutoMLJob +from azure.ai.ml._restclient.v2023_04_01_preview.models import JobBase, TaskType +from azure.ai.ml._restclient.v2023_04_01_preview.models._azure_machine_learning_workspaces_enums import ( + ClassificationPrimaryMetrics, +) +from azure.ai.ml._restclient.v2024_01_01_preview.models import TextNer as RestTextNER +from azure.ai.ml._utils.utils import camel_to_snake, is_data_binding_expression +from azure.ai.ml.constants._common import BASE_PATH_CONTEXT_KEY +from azure.ai.ml.constants._job.automl import AutoMLConstants +from azure.ai.ml.entities._credentials import _BaseJobIdentityConfiguration +from azure.ai.ml.entities._inputs_outputs import Input +from azure.ai.ml.entities._job._input_output_helpers import from_rest_data_outputs, to_rest_data_outputs +from azure.ai.ml.entities._job.automl.nlp.automl_nlp_job import AutoMLNLPJob +from azure.ai.ml.entities._job.automl.nlp.nlp_featurization_settings import NlpFeaturizationSettings +from azure.ai.ml.entities._job.automl.nlp.nlp_fixed_parameters import NlpFixedParameters +from azure.ai.ml.entities._job.automl.nlp.nlp_limit_settings import NlpLimitSettings +from azure.ai.ml.entities._job.automl.nlp.nlp_sweep_settings import NlpSweepSettings +from azure.ai.ml.entities._system_data import SystemData +from azure.ai.ml.entities._util import load_from_dict + +# avoid circular import error +if TYPE_CHECKING: + from azure.ai.ml.entities._component.component import Component + + +class TextNerJob(AutoMLNLPJob): + """Configuration for AutoML Text NER Job. + + :param training_data: Training data to be used for training, defaults to None + :type training_data: Optional[~azure.ai.ml.Input] + :param validation_data: Validation data to be used for evaluating the trained model, + defaults to None + :type validation_data: Optional[~azure.ai.ml.Input] + :param primary_metric: The primary metric to be displayed, defaults to None + :type primary_metric: Optional[str] + :param log_verbosity: Log verbosity level, defaults to None + :type log_verbosity: Optional[str] + + .. admonition:: Example: + + .. literalinclude:: ../samples/ml_samples_automl_nlp.py + :start-after: [START automl.text_ner_job] + :end-before: [END automl.text_ner_job] + :language: python + :dedent: 8 + :caption: creating an automl text ner job + """ + + _DEFAULT_PRIMARY_METRIC = ClassificationPrimaryMetrics.ACCURACY + + def __init__( + self, + *, + training_data: Optional[Input] = None, + validation_data: Optional[Input] = None, + primary_metric: Optional[str] = None, + log_verbosity: Optional[str] = None, + **kwargs: Any + ): + super(TextNerJob, self).__init__( + task_type=TaskType.TEXT_NER, + primary_metric=primary_metric or TextNerJob._DEFAULT_PRIMARY_METRIC, + training_data=training_data, + validation_data=validation_data, + log_verbosity=log_verbosity, + **kwargs, + ) + + @property + def primary_metric(self) -> Union[str, ClassificationPrimaryMetrics]: + return self._primary_metric + + @primary_metric.setter + def primary_metric(self, value: Union[str, ClassificationPrimaryMetrics]) -> None: + if is_data_binding_expression(str(value), ["parent"]): + self._primary_metric = value + return + + self._primary_metric = ( + TextNerJob._DEFAULT_PRIMARY_METRIC + if value is None + else ClassificationPrimaryMetrics[camel_to_snake(value).upper()] + ) + + def _to_rest_object(self) -> JobBase: + text_ner = RestTextNER( + training_data=self.training_data, + validation_data=self.validation_data, + limit_settings=self._limits._to_rest_object() if self._limits else None, + sweep_settings=self._sweep._to_rest_object() if self._sweep else None, + fixed_parameters=self._training_parameters._to_rest_object() if self._training_parameters else None, + search_space=( + [entry._to_rest_object() for entry in self._search_space if entry is not None] + if self._search_space is not None + else None + ), + featurization_settings=self._featurization._to_rest_object() if self._featurization else None, + primary_metric=self.primary_metric, + log_verbosity=self.log_verbosity, + ) + # resolve data inputs in rest object + self._resolve_data_inputs(text_ner) + + properties = RestAutoMLJob( + display_name=self.display_name, + description=self.description, + experiment_name=self.experiment_name, + tags=self.tags, + compute_id=self.compute, + properties=self.properties, + environment_id=self.environment_id, + environment_variables=self.environment_variables, + services=self.services, + outputs=to_rest_data_outputs(self.outputs), + resources=self.resources, + task_details=text_ner, + identity=self.identity._to_job_rest_object() if self.identity else None, + queue_settings=self.queue_settings, + ) + + result = JobBase(properties=properties) + result.name = self.name + return result + + @classmethod + def _from_rest_object(cls, obj: JobBase) -> "TextNerJob": + properties: RestAutoMLJob = obj.properties + task_details: RestTextNER = properties.task_details + assert isinstance(task_details, RestTextNER) + limits = ( + NlpLimitSettings._from_rest_object(task_details.limit_settings) if task_details.limit_settings else None + ) + featurization = ( + NlpFeaturizationSettings._from_rest_object(task_details.featurization_settings) + if task_details.featurization_settings + else None + ) + sweep = NlpSweepSettings._from_rest_object(task_details.sweep_settings) if task_details.sweep_settings else None + training_parameters = ( + NlpFixedParameters._from_rest_object(task_details.fixed_parameters) + if task_details.fixed_parameters + else None + ) + + text_ner_job = cls( + # ----- job specific params + id=obj.id, + name=obj.name, + description=properties.description, + tags=properties.tags, + properties=properties.properties, + experiment_name=properties.experiment_name, + services=properties.services, + status=properties.status, + creation_context=SystemData._from_rest_object(obj.system_data) if obj.system_data else None, + display_name=properties.display_name, + compute=properties.compute_id, + outputs=from_rest_data_outputs(properties.outputs), + resources=properties.resources, + # ----- task specific params + primary_metric=task_details.primary_metric, + log_verbosity=task_details.log_verbosity, + target_column_name=task_details.target_column_name, + training_data=task_details.training_data, + validation_data=task_details.validation_data, + limits=limits, + sweep=sweep, + training_parameters=training_parameters, + search_space=cls._get_search_space_from_str(task_details.search_space), + featurization=featurization, + identity=( + _BaseJobIdentityConfiguration._from_rest_object(properties.identity) if properties.identity else None + ), + queue_settings=properties.queue_settings, + ) + + text_ner_job._restore_data_inputs() + + return text_ner_job + + def _to_component(self, context: Optional[Dict] = None, **kwargs: Any) -> "Component": + raise NotImplementedError() + + @classmethod + def _load_from_dict(cls, data: Dict, context: Dict, additional_message: str, **kwargs: Any) -> "TextNerJob": + from azure.ai.ml._schema.automl.nlp_vertical.text_ner import TextNerSchema + + if kwargs.pop("inside_pipeline", False): + from azure.ai.ml._schema.pipeline.automl_node import AutoMLTextNerNode + + loaded_data = load_from_dict(AutoMLTextNerNode, data, context, additional_message, **kwargs) + else: + loaded_data = load_from_dict(TextNerSchema, data, context, additional_message, **kwargs) + job_instance = cls._create_instance_from_schema_dict(loaded_data) + return job_instance + + @classmethod + def _create_instance_from_schema_dict(cls, loaded_data: Dict) -> "TextNerJob": + loaded_data.pop(AutoMLConstants.TASK_TYPE_YAML, None) + return TextNerJob(**loaded_data) + + def _to_dict(self, inside_pipeline: bool = False) -> Dict: + from azure.ai.ml._schema.automl.nlp_vertical.text_ner import TextNerSchema + from azure.ai.ml._schema.pipeline.automl_node import AutoMLTextNerNode + + if inside_pipeline: + res_autoML: dict = AutoMLTextNerNode(context={BASE_PATH_CONTEXT_KEY: "./"}).dump(self) + return res_autoML + + res: dict = TextNerSchema(context={BASE_PATH_CONTEXT_KEY: "./"}).dump(self) + return res + + def __eq__(self, other: object) -> bool: + if not isinstance(other, TextNerJob): + return NotImplemented + + if not super(TextNerJob, self).__eq__(other): + return False + + return self.primary_metric == other.primary_metric + + def __ne__(self, other: object) -> bool: + return not self.__eq__(other) |
