PyPI - trainml - Versions diffs - 0.5.9__py3-none-any.whl → 0.5.12__py3-none-any.whl - Mend

trainml 0.5.9py3-none-any.whl → 0.5.12py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (64) hide show

tests/integration/projects/conftest.py +3 -1
tests/integration/projects/test_projects_credentials_integration.py +45 -0
tests/integration/projects/test_projects_data_connectors_integration.py +44 -0
tests/integration/projects/test_projects_datastores_integration.py +42 -0
tests/integration/projects/test_projects_secrets_integration.py +1 -1
tests/integration/projects/test_projects_services_integration.py +44 -0
tests/integration/test_checkpoints_integration.py +1 -2
tests/integration/test_models_integration.py +0 -1
tests/unit/cli/projects/__init__.py +0 -0
tests/unit/cli/projects/test_cli_project_credential_unit.py +26 -0
tests/unit/cli/projects/test_cli_project_data_connector_unit.py +28 -0
tests/unit/cli/projects/test_cli_project_datastore_unit.py +26 -0
tests/unit/cli/projects/test_cli_project_key_unit.py +26 -0
tests/unit/cli/projects/test_cli_project_secret_unit.py +26 -0
tests/unit/cli/projects/test_cli_project_service_unit.py +26 -0
tests/unit/cli/projects/test_cli_project_unit.py +19 -0
tests/unit/cloudbender/test_datastores_unit.py +1 -5
tests/unit/conftest.py +79 -6
tests/unit/projects/test_project_credentials_unit.py +100 -0
tests/unit/projects/test_projects_unit.py +1 -1
tests/unit/test_checkpoints_unit.py +15 -23
tests/unit/test_datasets_unit.py +15 -20
tests/unit/test_models_unit.py +13 -16
tests/unit/test_volumes_unit.py +3 -0
trainml/__init__.py +1 -1
trainml/checkpoints.py +14 -3
trainml/cli/cloudbender/datastore.py +2 -7
trainml/cli/job/create.py +16 -16
trainml/cli/project/__init__.py +4 -73
trainml/cli/project/credential.py +128 -0
trainml/cli/project/data_connector.py +61 -0
trainml/cli/project/datastore.py +61 -0
trainml/cli/project/secret.py +12 -3
trainml/cli/project/service.py +61 -0
trainml/cloudbender/data_connectors.py +8 -0
trainml/cloudbender/datastores.py +9 -19
trainml/cloudbender/nodes.py +44 -1
trainml/cloudbender/providers.py +53 -0
trainml/cloudbender/regions.py +48 -0
trainml/datasets.py +14 -3
trainml/exceptions.py +51 -0
trainml/jobs.py +2 -13
trainml/models.py +14 -3
trainml/projects/credentials.py +71 -0
trainml/projects/projects.py +7 -4
trainml/projects/secrets.py +1 -1
trainml/volumes.py +15 -3
{trainml-0.5.9.dist-info → trainml-0.5.12.dist-info}/METADATA +1 -1
{trainml-0.5.9.dist-info → trainml-0.5.12.dist-info}/RECORD +53 -46
tests/integration/test_projects_integration.py +0 -44
tests/unit/cli/cloudbender/test_cli_reservation_unit.py +0 -34
tests/unit/cli/test_cli_project_unit.py +0 -42
tests/unit/cloudbender/test_reservations_unit.py +0 -173
tests/unit/test_auth.py +0 -30
tests/unit/test_projects_unit.py +0 -320
tests/unit/test_trainml.py +0 -54
trainml/cli/cloudbender/reservation.py +0 -159
trainml/cli/project.py +0 -149
trainml/cloudbender/reservations.py +0 -126
trainml/projects.py +0 -228
{trainml-0.5.9.dist-info → trainml-0.5.12.dist-info}/LICENSE +0 -0
{trainml-0.5.9.dist-info → trainml-0.5.12.dist-info}/WHEEL +0 -0
{trainml-0.5.9.dist-info → trainml-0.5.12.dist-info}/entry_points.txt +0 -0
{trainml-0.5.9.dist-info → trainml-0.5.12.dist-info}/top_level.txt +0 -0

trainml/cli/project/credential.py ADDED Viewed

@@ -0,0 +1,128 @@
+import click
+import os
+import json
+import base64
+from pathlib import Path
+from trainml.cli import pass_config
+from trainml.cli.project import project
+@project.group()
+@pass_config
+def credential(config):
+    """trainML project credential commands."""
+    pass
+@credential.command()
+@pass_config
+def list(config):
+    """List credentials."""
+    data = [
+        ["TYPE", "KEY ID", "UPDATED AT"],
+        [
+            "-" * 80,
+            "-" * 80,
+            "-" * 80,
+        ],
+    ]
+    project = config.trainml.run(config.trainml.client.projects.get_current())
+    credentials = config.trainml.run(project.credentials.list())
+    for credential in credentials:
+        data.append(
+            [
+                credential.type,
+                credential.key_id,
+                credential.updated_at.isoformat(timespec="seconds"),
+            ]
+        )
+    for row in data:
+        click.echo(
+            "{: >13.11} {: >37.35} {: >28.26}" "".format(*row),
+            file=config.stdout,
+        )
+@credential.command()
+@click.argument(
+    "type",
+    type=click.Choice(
+        [
+            "aws",
+            "azure",
+            "docker",
+            "gcp",
+            "huggingface",
+            "kaggle",
+            "ngc",
+            "wasabi",
+        ],
+        case_sensitive=False,
+    ),
+)
+@pass_config
+def put(config, type):
+    """
+    Set a credential.
+    A credential is uploaded.
+    """
+    project = config.trainml.run(config.trainml.client.projects.get_current())
+    tenant = None
+    if type in ["aws", "wasabi"]:
+        credential_id = click.prompt(
+            "Enter the credential ID", type=str, hide_input=False
+        )
+        secret = click.prompt("Enter the secret credential", type=str, hide_input=True)
+    elif type == "azure":
+        credential_id = click.prompt(
+            "Enter the Application (client) ID", type=str, hide_input=False
+        )
+        tenant = click.prompt(
+            "Enter the Directory (tenant) ley", type=str, hide_input=False
+        )
+        secret = click.prompt("Enter the client secret", type=str, hide_input=True)
+    elif type in ["docker", "huggingface"]:
+        credential_id = click.prompt("Enter the username", type=str, hide_input=False)
+        secret = click.prompt("Enter the access token", type=str, hide_input=True)
+    elif type in ["gcp", "kaggle"]:
+        file_name = click.prompt(
+            "Enter the path of the credentials file",
+            type=click.Path(
+                exists=True, file_okay=True, dir_okay=False, resolve_path=True
+            ),
+            hide_input=False,
+        )
+        credential_id = os.path.basename(file_name)
+        with open(file_name) as f:
+            secret = json.load(f)
+        secret = json.dumps(secret)
+    elif type == "ngc":
+        credential_id = "$oauthtoken"
+        secret = click.prompt("Enter the access token", type=str, hide_input=True)
+    else:
+        raise click.UsageError("Unsupported credential type")
+    return config.trainml.run(
+        project.credentials.put(
+            type=type, credential_id=credential_id, secret=secret, tenant=tenant
+        )
+    )
+@credential.command()
+@click.argument("name", type=click.STRING)
+@pass_config
+def remove(config, name):
+    """
+    Remove a credential.
+    """
+    project = config.trainml.run(config.trainml.client.projects.get_current())
+    return config.trainml.run(project.credential.remove(name))

trainml/cli/project/data_connector.py ADDED Viewed

@@ -0,0 +1,61 @@
+import click
+import os
+import json
+import base64
+from pathlib import Path
+from trainml.cli import pass_config
+from trainml.cli.project import project
+@project.group()
+@pass_config
+def data_connector(config):
+    """trainML project data_connector commands."""
+    pass
+@data_connector.command()
+@pass_config
+def list(config):
+    """List project data_connectors."""
+    data = [
+        ["ID", "NAME", "TYPE", "REGION_UUID"],
+        [
+            "-" * 80,
+            "-" * 80,
+            "-" * 80,
+            "-" * 80,
+        ],
+    ]
+    project = config.trainml.run(
+        config.trainml.client.projects.get(config.trainml.client.project)
+    )
+    data_connectors = config.trainml.run(project.data_connectors.list())
+    for data_connector in data_connectors:
+        data.append(
+            [
+                data_connector.id,
+                data_connector.name,
+                data_connector.type,
+                data_connector.region_uuid,
+            ]
+        )
+    for row in data:
+        click.echo(
+            "{: >38.36} {: >30.28} {: >15.13} {: >38.36}" "".format(*row),
+            file=config.stdout,
+        )
+@data_connector.command()
+@pass_config
+def refresh(config):
+    """
+    Refresh project data_connector list.
+    """
+    project = config.trainml.run(config.trainml.client.projects.get_current())
+    return config.trainml.run(project.data_connectors.refresh())

trainml/cli/project/datastore.py ADDED Viewed

@@ -0,0 +1,61 @@
+import click
+import os
+import json
+import base64
+from pathlib import Path
+from trainml.cli import pass_config
+from trainml.cli.project import project
+@project.group()
+@pass_config
+def datastore(config):
+    """trainML project datastore commands."""
+    pass
+@datastore.command()
+@pass_config
+def list(config):
+    """List project datastores."""
+    data = [
+        ["ID", "NAME", "TYPE", "REGION_UUID"],
+        [
+            "-" * 80,
+            "-" * 80,
+            "-" * 80,
+            "-" * 80,
+        ],
+    ]
+    project = config.trainml.run(
+        config.trainml.client.projects.get(config.trainml.client.project)
+    )
+    datastores = config.trainml.run(project.datastores.list())
+    for datastore in datastores:
+        data.append(
+            [
+                datastore.id,
+                datastore.name,
+                datastore.type,
+                datastore.region_uuid,
+            ]
+        )
+    for row in data:
+        click.echo(
+            "{: >38.36} {: >30.28} {: >15.13} {: >38.36}" "".format(*row),
+            file=config.stdout,
+        )
+@datastore.command()
+@pass_config
+def refresh(config):
+    """
+    Refresh project datastore list.
+    """
+    project = config.trainml.run(config.trainml.client.projects.get_current())
+    return config.trainml.run(project.datastores.refresh())

trainml/cli/project/secret.py CHANGED Viewed

@@ -1,4 +1,5 @@
 import click
+import os
 from trainml.cli import pass_config
 from trainml.cli.project import project
@@ -42,17 +43,25 @@ def list(config):
 @secret.command()
+@click.option(
+    "--file",
+    type=click.Path(exists=True, file_okay=True, dir_okay=False, resolve_path=True),
+    help="Load the secret value from the file at the provided path",
+)
 @click.argument("name", type=click.STRING)
 @pass_config
-def put(config, name):
+def put(config, file, name):
     """
     Set a secret value.
     Secret is created with the specified NAME.
     """
     project = config.trainml.run(config.trainml.client.projects.get_current())
-    value = click.prompt("Enter the secret value", type=str, hide_input=True)
+    if file:
+        with open(os.path.expanduser(file)) as f:
+            value = f.read()
+    else:
+        value = click.prompt("Enter the secret value", type=str, hide_input=True)
     return config.trainml.run(project.secrets.put(name=name, value=value))

trainml/cli/project/service.py ADDED Viewed

@@ -0,0 +1,61 @@
+import click
+import os
+import json
+import base64
+from pathlib import Path
+from trainml.cli import pass_config
+from trainml.cli.project import project
+@project.group()
+@pass_config
+def service(config):
+    """trainML project service commands."""
+    pass
+@service.command()
+@pass_config
+def list(config):
+    """List project services."""
+    data = [
+        ["ID", "NAME", "TYPE", "REGION_UUID"],
+        [
+            "-" * 80,
+            "-" * 80,
+            "-" * 80,
+            "-" * 80,
+        ],
+    ]
+    project = config.trainml.run(
+        config.trainml.client.projects.get(config.trainml.client.project)
+    )
+    services = config.trainml.run(project.services.list())
+    for service in services:
+        data.append(
+            [
+                service.id,
+                service.name,
+                service.hostname,
+                service.region_uuid,
+            ]
+        )
+    for row in data:
+        click.echo(
+            "{: >38.36} {: >30.28} {: >15.13} {: >38.36}" "".format(*row),
+            file=config.stdout,
+        )
+@service.command()
+@pass_config
+def refresh(config):
+    """
+    Refresh project service list.
+    """
+    project = config.trainml.run(config.trainml.client.projects.get_current())
+    return config.trainml.run(project.services.refresh())

trainml/cloudbender/data_connectors.py CHANGED Viewed

@@ -1,5 +1,13 @@
 import json
 import logging
+import asyncio
+import math
+from trainml.exceptions import (
+    ApiError,
+    SpecificationError,
+    TrainMLException,
+)
 class DataConnectors(object):

trainml/cloudbender/datastores.py CHANGED Viewed

@@ -1,5 +1,13 @@
 import json
 import logging
+import asyncio
+import math
+from trainml.exceptions import (
+    ApiError,
+    SpecificationError,
+    TrainMLException,
+)
 class Datastores(object):
@@ -20,9 +28,7 @@ class Datastores(object):
             "GET",
             kwargs,
         )
-        datastores = [
-            Datastore(self.trainml, **datastore) for datastore in resp
-        ]
+        datastores = [Datastore(self.trainml, **datastore) for datastore in resp]
         return datastores
     async def create(
@@ -31,18 +37,12 @@ class Datastores(object):
         region_uuid,
         name,
         type,
-        uri,
-        root,
-        options=None,
         **kwargs,
     ):
         logging.info(f"Creating Datastore {name}")
         data = dict(
             name=name,
             type=type,
-            uri=uri,
-            root=root,
-            options=options,
             **kwargs,
         )
         payload = {k: v for k, v in data.items() if v is not None}
@@ -73,8 +73,6 @@ class Datastore:
         self._region_uuid = self._datastore.get("region_uuid")
         self._type = self._datastore.get("type")
         self._name = self._datastore.get("name")
-        self._uri = self._datastore.get("uri")
-        self._root = self._datastore.get("root")
     @property
     def id(self) -> str:
@@ -96,14 +94,6 @@ class Datastore:
     def name(self) -> str:
         return self._name
-    @property
-    def uri(self) -> str:
-        return self._uri
-    @property
-    def root(self) -> str:
-        return self._root
     def __str__(self):
         return json.dumps({k: v for k, v in self._datastore.items()})

trainml/cloudbender/nodes.py CHANGED Viewed

@@ -1,5 +1,9 @@
 import json
 import logging
+import asyncio
+import math
+from trainml.exceptions import ApiError, SpecificationError, TrainMLException, NodeError
 class Nodes(object):
@@ -29,7 +33,7 @@ class Nodes(object):
         region_uuid,
         friendly_name,
         hostname,
-        minion_id,
+        minion_id=None,
         type="permanent",
         service="compute",
         **kwargs,
@@ -153,3 +157,42 @@ class Node:
             None,
             dict(command=command),
         )
+    async def wait_for(self, status, timeout=300):
+        if self.status == status:
+            return
+        valid_statuses = ["active", "maintenance", "offline", "stopped", "archived"]
+        if not status in valid_statuses:
+            raise SpecificationError(
+                "status",
+                f"Invalid wait_for status {status}.  Valid statuses are: {valid_statuses}",
+            )
+        MAX_TIMEOUT = 24 * 60 * 60
+        if timeout > MAX_TIMEOUT:
+            raise SpecificationError(
+                "timeout",
+                f"timeout must be less than {MAX_TIMEOUT} seconds.",
+            )
+        POLL_INTERVAL_MIN = 5
+        POLL_INTERVAL_MAX = 60
+        POLL_INTERVAL = max(min(timeout / 60, POLL_INTERVAL_MAX), POLL_INTERVAL_MIN)
+        retry_count = math.ceil(timeout / POLL_INTERVAL)
+        count = 0
+        while count < retry_count:
+            await asyncio.sleep(POLL_INTERVAL)
+            try:
+                await self.refresh()
+            except ApiError as e:
+                if status == "archived" and e.status == 404:
+                    return
+                raise e
+            if self.status in ["errored", "failed"]:
+                raise NodeError(self.status, self)
+            if self.status == status:
+                return self
+            else:
+                count += 1
+                logging.debug(f"self: {self}, retry count {count}")
+        raise TrainMLException(f"Timeout waiting for {status}")

trainml/cloudbender/providers.py CHANGED Viewed

@@ -1,7 +1,16 @@
 import json
 import logging
+import asyncio
+import math
 from datetime import datetime
+from trainml.exceptions import (
+    ApiError,
+    SpecificationError,
+    TrainMLException,
+    ProviderError,
+)
 class Providers(object):
     def __init__(self, trainml):
@@ -36,6 +45,7 @@ class Provider:
         self._provider = kwargs
         self._id = self._provider.get("provider_uuid")
         self._type = self._provider.get("type")
+        self._status = self._provider.get("status")
         self._credits = self._provider.get("credits")
     @property
@@ -46,6 +56,10 @@ class Provider:
     def type(self) -> str:
         return self._type
+    @property
+    def status(self) -> str:
+        return self._status
     @property
     def credits(self) -> float:
         return self._credits
@@ -69,3 +83,42 @@ class Provider:
         )
         self.__init__(self.trainml, **resp)
         return self
+    async def wait_for(self, status, timeout=300):
+        if self.status == status:
+            return
+        valid_statuses = ["ready", "archived"]
+        if not status in valid_statuses:
+            raise SpecificationError(
+                "status",
+                f"Invalid wait_for status {status}.  Valid statuses are: {valid_statuses}",
+            )
+        MAX_TIMEOUT = 24 * 60 * 60
+        if timeout > MAX_TIMEOUT:
+            raise SpecificationError(
+                "timeout",
+                f"timeout must be less than {MAX_TIMEOUT} seconds.",
+            )
+        POLL_INTERVAL_MIN = 5
+        POLL_INTERVAL_MAX = 60
+        POLL_INTERVAL = max(min(timeout / 60, POLL_INTERVAL_MAX), POLL_INTERVAL_MIN)
+        retry_count = math.ceil(timeout / POLL_INTERVAL)
+        count = 0
+        while count < retry_count:
+            await asyncio.sleep(POLL_INTERVAL)
+            try:
+                await self.refresh()
+            except ApiError as e:
+                if status == "archived" and e.status == 404:
+                    return
+                raise e
+            if self.status in ["errored", "failed"]:
+                raise ProviderError(self.status, self)
+            if self.status == status:
+                return self
+            else:
+                count += 1
+                logging.debug(f"self: {self}, retry count {count}")
+        raise TrainMLException(f"Timeout waiting for {status}")

trainml/cloudbender/regions.py CHANGED Viewed

@@ -1,5 +1,14 @@
 import json
 import logging
+import asyncio
+import math
+from trainml.exceptions import (
+    ApiError,
+    SpecificationError,
+    TrainMLException,
+    RegionError,
+)
 class Regions(object):
@@ -111,3 +120,42 @@ class Region:
             None,
             dict(project_uuid=project_uuid, checkpoint_uuid=checkpoint_uuid),
         )
+    async def wait_for(self, status, timeout=300):
+        if self.status == status:
+            return
+        valid_statuses = ["healthy", "offline", "archived"]
+        if not status in valid_statuses:
+            raise SpecificationError(
+                "status",
+                f"Invalid wait_for status {status}.  Valid statuses are: {valid_statuses}",
+            )
+        MAX_TIMEOUT = 24 * 60 * 60
+        if timeout > MAX_TIMEOUT:
+            raise SpecificationError(
+                "timeout",
+                f"timeout must be less than {MAX_TIMEOUT} seconds.",
+            )
+        POLL_INTERVAL_MIN = 5
+        POLL_INTERVAL_MAX = 60
+        POLL_INTERVAL = max(min(timeout / 60, POLL_INTERVAL_MAX), POLL_INTERVAL_MIN)
+        retry_count = math.ceil(timeout / POLL_INTERVAL)
+        count = 0
+        while count < retry_count:
+            await asyncio.sleep(POLL_INTERVAL)
+            try:
+                await self.refresh()
+            except ApiError as e:
+                if status == "archived" and e.status == 404:
+                    return
+                raise e
+            if self.status in ["errored", "failed"]:
+                raise RegionError(self.status, self)
+            if self.status == status:
+                return self
+            else:
+                count += 1
+                logging.debug(f"self: {self}, retry count {count}")
+        raise TrainMLException(f"Timeout waiting for {status}")

trainml/datasets.py CHANGED Viewed

@@ -31,13 +31,24 @@ class Datasets(object):
         datasets = [Dataset(self.trainml, **dataset) for dataset in resp]
         return datasets
-    async def create(self, name, source_type, source_uri, **kwargs):
+    async def create(
+        self,
+        name,
+        source_type,
+        source_uri,
+        type="evefs",
+        project_uuid=None,
+        **kwargs,
+    ):
+        if not project_uuid:
+            project_uuid = self.trainml.active_project
         data = dict(
             name=name,
             source_type=source_type,
             source_uri=source_uri,
-            source_options=kwargs.get("source_options"),
-            project_uuid=kwargs.get("project_uuid") or self.trainml.active_project,
+            project_uuid=project_uuid,
+            type=type,
+            **kwargs,
         )
         payload = {k: v for k, v in data.items() if v is not None}
         logging.info(f"Creating Dataset {name}")

trainml 0.5.9__py3-none-any.whl → 0.5.12__py3-none-any.whl

trainml 0.5.9py3-none-any.whl → 0.5.12py3-none-any.whl