diff options
Diffstat (limited to '.venv/lib/python3.12/site-packages/azure/ai/ml/_internal/entities/runsettings')
4 files changed, 407 insertions, 0 deletions
diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/_internal/entities/runsettings/__init__.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/_internal/entities/runsettings/__init__.py new file mode 100644 index 00000000..76c94c17 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/_internal/entities/runsettings/__init__.py @@ -0,0 +1,29 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +from .ai_super_computer_configuration import ( + AISuperComputerConfiguration, + AISuperComputerScalePolicy, + AISuperComputerStorageReferenceConfiguration, +) +from .itp_configuration import ( + ITPConfiguration, + ITPInteractiveConfiguration, + ITPPriorityConfiguration, + ITPResourceConfiguration, + ITPRetrySettings, +) +from .target_selector import TargetSelector + +__all__ = [ + "ITPInteractiveConfiguration", + "ITPPriorityConfiguration", + "ITPResourceConfiguration", + "ITPRetrySettings", + "ITPConfiguration", + "TargetSelector", + "AISuperComputerConfiguration", + "AISuperComputerScalePolicy", + "AISuperComputerStorageReferenceConfiguration", +] diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/_internal/entities/runsettings/ai_super_computer_configuration.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/_internal/entities/runsettings/ai_super_computer_configuration.py new file mode 100644 index 00000000..89f338ca --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/_internal/entities/runsettings/ai_super_computer_configuration.py @@ -0,0 +1,194 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +from typing import Any, Dict, List, Optional + +from ....entities._job.job_resource_configuration import BaseProperty + + +class PascalCaseProperty(BaseProperty): + _KEY_MAPPING: Dict[str, Any] = {} + + def items(self): + result = [] + for key, value in super().items(): + if key.lower() in self._KEY_MAPPING: + key = self._KEY_MAPPING[key.lower()] + result.append((key, value)) + return result + + +class AISuperComputerStorageReferenceConfiguration(PascalCaseProperty): # pylint: disable=name-too-long + _KEY_MAPPING = { + "container_name": "ContainerName", + "relative_path": "RelativePath", + } + + def __init__( + self, + container_name: str, + relative_path: str, + **kwargs, + ): + """ + :param container_name: The name of the ai-super-computer storage container. + :type container_name: str + :param relative_path: The path on the ai-super-computer storage container. + :type relative_path: str + """ + super().__init__(**kwargs) + self.container_name = container_name + self.relative_path = relative_path + + +class AISuperComputerScalePolicy(PascalCaseProperty): + _KEY_MAPPING = { + "auto_scale_instance_type_count_set": "AutoScaleInstanceTypeCountSet", + "auto_scale_interval_in_sec": "AutoScaleIntervalInSec", + "max_instance_type_count": "MaxInstanceTypeCount", + "min_instance_type_count": "MinInstanceTypeCount", + } + + def __init__( + self, + auto_scale_instance_type_count_set: Optional[List[int]] = None, + auto_scale_interval_in_sec: Optional[int] = None, + max_instance_type_count: Optional[int] = None, + min_instance_type_count: Optional[int] = None, + **kwargs, + ): + """ + :param auto_scale_instance_type_count_set: The list of instance type counts available + for elastically scaling the job. Assume currentInstanceTypeCount = 4 and + autoScaleInstanceTypeCountSet = [2,4,8], the job will automatically scale down as 8->4->2 + when less capacity is available, and scale up as 2->4->8 when more capacity is available. + The value should be a list of integers in ascending order. + :type auto_scale_instance_type_count_set: List[int] + :param auto_scale_interval_in_sec: The minimum interval in seconds between job autoscaling. + You are recommended to set the autoScaleIntervalInSec longer than the checkpoint interval, + to make sure at least one checkpoint is saved before auto-scaling of the job. + :type auto_scale_interval_in_sec: int + :param max_instance_type_count: The maximum instance type count. + :type max_instance_type_count: int + :param min_instance_type_count: The minimum instance type count. + :type min_instance_type_count: int + """ + super().__init__(**kwargs) + self.auto_scale_instance_type_count_set = auto_scale_instance_type_count_set + self.auto_scale_interval_in_sec = auto_scale_interval_in_sec + self.max_instance_type_count = max_instance_type_count + self.min_instance_type_count = min_instance_type_count + + +class AISuperComputerConfiguration(PascalCaseProperty): # pylint: disable=too-many-instance-attributes + """A class to manage AI Super Computer Configuration.""" + + _KEY_MAPPING = { + "instance_type": "InstanceType", + "instance_types": "InstanceTypes", + "image_version": "ImageVersion", + "location": "Location", + "locations": "Locations", + "ai_super_computer_storage_data": "AISuperComputerStorageData", + "interactive": "Interactive", + "scale_policy": "ScalePolicy", + "virtual_cluster_arm_id": "VirtualClusterArmId", + "tensorboard_log_directory": "TensorboardLogDirectory", + "ssh_public_key": "SSHPublicKey", + "ssh_public_keys": "SSHPublicKeys", + "enable_azml_int": "EnableAzmlInt", + "priority": "Priority", + "sla_tier": "SLATier", + "suspend_on_idle_time_hours": "SuspendOnIdleTimeHours", + "user_alias": "UserAlias", + } + + def __init__( + self, + instance_type: Optional[str] = None, + instance_types: Optional[List[str]] = None, + image_version: Optional[str] = None, + location: Optional[str] = None, + locations: Optional[List[str]] = None, + ai_super_computer_storage_data: Optional[Dict[str, AISuperComputerStorageReferenceConfiguration]] = None, + interactive: Optional[bool] = None, + scale_policy: Optional[AISuperComputerScalePolicy] = None, + virtual_cluster_arm_id: Optional[str] = None, + tensorboard_log_directory: Optional[str] = None, + ssh_public_key: Optional[str] = None, + ssh_public_keys: Optional[List[str]] = None, + enable_azml_int: Optional[bool] = None, + priority: Optional[str] = None, + sla_tier: Optional[str] = None, + suspend_on_idle_time_hours: Optional[int] = None, + user_alias: Optional[str] = None, + **kwargs, + ): + """ + :param instance_type: The class of compute to be used. The list of instance types is + available in https://singularitydocs.azurewebsites.net/docs/overview/instance_types/ + :type instance_type: str + :param instance_types: The class of compute to be used. The list of instance types is + available in https://singularitydocs.azurewebsites.net/docs/overview/instance_types/ + :type instance_types: List[str] + :param image_version: The image to use in ai-super-computer. Currently only a limited set of predefined + images are supported. + :type image_version: str + :param location: The location (region) where the job will run. The workspace region is used + if neither location nor locations is specified. + :type location: str + :param locations: The location (region) where the job will run. The workspace region is used + if neither location nor locations is specified. + :type locations: List[str] + :param ai_super_computer_storage_data: All of the AI SuperComputer storage data sources to + be made available to the run based on the configurations. + :type ai_super_computer_storage_data: Dict[str, AISuperComputerStorageReferenceConfiguration] + :param interactive: Specifies whether the job should be interactive. Interactive jobs will + start the requested nodes, but not run a command. + :type interactive: bool + :param scale_policy: The elasticity options for a job. By leveraging elastic training, + the job will automatically scale up when there is extra capacity available, + and automatically scale down when resources are gradually called back. + :type scale_policy: AISuperComputerScalePolicy + :param virtual_cluster_arm_id: The ARM Resource Id for the Virtual Cluster to submit the + job to. + :type virtual_cluster_arm_id: str + :param tensorboard_log_directory: The directory where the Tensorboard logs will be written. + :type tensorboard_log_directory: str + :param ssh_public_key: The SSH Public Key to use when enabling SSH access to the job. + If not specified, username/password auth will be enabled. + :type ssh_public_key: str + :param ssh_public_keys: The SSH Public Key to use when enabling SSH access to the job. + If not specified, username/password auth will be enabled. + :type ssh_public_keys: List[str] + :param enable_azml_int: Specifies whether the job should include the azml_int utility + :type enable_azml_int: bool + :param priority: The priority of the job. The default value is Medium. + :type priority: str + :param sla_tier: The SLA tier of the job. The default value is Standard. + :type sla_tier: str + :param suspend_on_idle_time_hours: Minimum idle time before run gets automatically suspended + (in hours). + :type suspend_on_idle_time_hours: int + :param user_alias: User alias, used for naming mount paths. + :type user_alias: str + """ + super().__init__(**kwargs) + self.instance_type = instance_type + self.instance_types = instance_types + self.image_version = image_version + self.location = location + self.locations = locations + self.ai_super_computer_storage_data = ai_super_computer_storage_data + self.interactive = interactive + self.scale_policy = scale_policy + self.virtual_cluster_arm_id = virtual_cluster_arm_id + self.tensorboard_log_directory = tensorboard_log_directory + self.ssh_public_key = ssh_public_key + self.ssh_public_keys = ssh_public_keys + self.enable_azml_int = enable_azml_int + self.priority = priority + self.sla_tier = sla_tier + self.suspend_on_idle_time_hours = suspend_on_idle_time_hours + self.user_alias = user_alias diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/_internal/entities/runsettings/itp_configuration.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/_internal/entities/runsettings/itp_configuration.py new file mode 100644 index 00000000..8868b33b --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/_internal/entities/runsettings/itp_configuration.py @@ -0,0 +1,137 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- +from typing import List, Optional + +from ....entities._job.job_resource_configuration import BaseProperty + + +class ITPResourceConfiguration(BaseProperty): + """ITP resource configuration.""" + + def __init__( + self, + gpu_count: Optional[int] = None, + cpu_count: Optional[int] = None, + memory_request_in_gb: Optional[int] = None, + **kwargs + ): + """ + :param gpu_count: Gpu count Defines how many gpu cores a single node gpu job will use. + Default value is 1. + :type gpu_count: int + :param cpu_count: Cpu count defines how many cpu cores that a single node cpu job will use. + Default value is 1. + :type cpu_count: int + :param memory_request_in_gb: Memory request defines how much GB memory a single node job + will request. Default value is 0 which means we will automatically calculate it for user. + :type memory_request_in_gb: int + """ + super().__init__(**kwargs) + self.gpu_count = gpu_count + self.cpu_count = cpu_count + self.memory_request_in_gb = memory_request_in_gb + + +class ITPPriorityConfiguration(BaseProperty): + """ITP priority configuration.""" + + def __init__( + self, + job_priority: Optional[int] = None, + is_preemptible: Optional[bool] = None, + node_count_set: Optional[List[int]] = None, + scale_interval: Optional[int] = None, + **kwargs + ): + """ + :param job_priority: The priority of a job. Default value is 200. User can set it to + 100~200. Any value larger than 200 or less than 100 will be treated as 200. + in azureml.components + :type job_priority: int + :param is_preemptible: Whether to preempt extra compute resources beyond the VC quota. + Default value is false. + in azureml.components + :type is_preemptible: bool + :param node_count_set: Node count set determines how compute auto-scale nodes. The value + should be a list of integers in ascending order. And Only available when IsPreemptible is + true. + :type node_count_set: List[int] + :param scale_interval: Scale interval in min. + :type scale_interval: int + """ + super().__init__(**kwargs) + self.job_priority = job_priority + self.is_preemptible = is_preemptible + self.node_count_set = node_count_set + self.scale_interval = scale_interval + + +class ITPInteractiveConfiguration(BaseProperty): + """ITP interactive configuration.""" + + def __init__( + self, + is_ssh_enabled: Optional[bool] = None, + ssh_public_key: Optional[str] = None, + is_i_python_enabled: Optional[bool] = None, + is_tensor_board_enabled: Optional[bool] = None, + interactive_port: Optional[int] = None, + **kwargs + ): + """ + :param is_ssh_enabled: Whether to enable SSH for interactive development. + Default value is false. + :type is_ssh_enabled: bool + :param ssh_public_key: SSH public key. + :type ssh_public_key: str + :param is_i_python_enabled: Is iPython enabled. + :type is_i_python_enabled: bool + :param is_tensor_board_enabled: Whether to enable TensorBoard. Default value is false. + + :type is_tensor_board_enabled: bool + :param interactive_port: Allows user to specify a different interactive port. Available + value from 40000 to 49999. + :type interactive_port: int + """ + super().__init__(**kwargs) + self.is_ssh_enabled = is_ssh_enabled + self.ssh_public_key = ssh_public_key + self.is_i_python_enabled = is_i_python_enabled + self.is_tensor_board_enabled = is_tensor_board_enabled + self.interactive_port = interactive_port + + +class ITPRetrySettings(BaseProperty): + def __init__(self, max_retry_count=None, **kwargs): + super().__init__(**kwargs) + self.max_retry_count = max_retry_count + + +class ITPConfiguration(BaseProperty): + """ITP configuration.""" + + def __init__( + self, + resource_configuration: Optional[ITPResourceConfiguration] = None, + priority_configuration: Optional[ITPPriorityConfiguration] = None, + interactive_configuration: Optional[ITPInteractiveConfiguration] = None, + retry: Optional[ITPRetrySettings] = None, + **kwargs + ): + """ + :param resource_configuration: Resource requirement for the compute. + + :type resource_configuration: ITPResourceConfiguration + :param priority_configuration: Priority requirement for the compute. + + :type priority_configuration: ITPPriorityConfiguration + :param interactive_configuration: Interactive configuration when trying to access the + compute. + :type interactive_configuration: ITPInteractiveConfiguration + """ + self.resource_configuration = resource_configuration or ITPResourceConfiguration() + self.priority_configuration = priority_configuration or ITPPriorityConfiguration() + self.interactive_configuration = interactive_configuration or ITPInteractiveConfiguration() + self.retry = retry or ITPRetrySettings() + super().__init__(**kwargs) diff --git a/.venv/lib/python3.12/site-packages/azure/ai/ml/_internal/entities/runsettings/target_selector.py b/.venv/lib/python3.12/site-packages/azure/ai/ml/_internal/entities/runsettings/target_selector.py new file mode 100644 index 00000000..92f72db7 --- /dev/null +++ b/.venv/lib/python3.12/site-packages/azure/ai/ml/_internal/entities/runsettings/target_selector.py @@ -0,0 +1,47 @@ +# --------------------------------------------------------- +# Copyright (c) Microsoft Corporation. All rights reserved. +# --------------------------------------------------------- + +from typing import List, Optional + +from ....entities._job.job_resource_configuration import BaseProperty + + +class TargetSelector(BaseProperty): + """Compute target selector.""" + + def __init__( + self, + compute_type: str, + instance_types: Optional[List[str]] = None, + regions: Optional[List[str]] = None, + my_resource_only: Optional[bool] = None, + allow_spot_vm: Optional[bool] = None, + **kwargs, + ): + """ + :param compute_type: Compute type that target selector could route job to. + Example value: AmlCompute, AmlK8s. + :type compute_type: str + :param instance_types: List of instance_type that job could use. If no instance_types sre + specified, all sizes are allowed. Note instance_types here only contains VM SKU. + Example value: ["STANDARD_D2_V2", "ND24rs_v3"]. Note, this field is case sensitive. + :type instance_types: List[str] + :param regions: List of regions that would like to submit job to. + If no regions are specified, all regions are allowed. Example value: ["eastus"]. + Currently it only works for ITP. + :type regions: List[str] + :param my_resource_only: Flag to control whether the job should be sent to the cluster + owned by user. If False, target selector may send the job to shared cluster. Currently it + only works for ITP. + :type my_resource_only: bool + :param allow_spot_vm: Flag to enable target selector service to send job to low priority VM. + Currently it only works for ITP. + :type allow_spot_vm: bool + """ + super().__init__(**kwargs) + self.compute_type = compute_type + self.instance_types = instance_types + self.regions = regions + self.my_resource_only = my_resource_only + self.allow_spot_vm = allow_spot_vm |