PyPI - together - Versions diffs - 1.5.17__py3-none-any.whl → 2.0.0a8__py3-none-any.whl - Mend

together 1.5.17py3-none-any.whl → 2.0.0a8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (205) hide show

together/__init__.py +101 -63
together/_base_client.py +1995 -0
together/_client.py +1033 -0
together/_compat.py +219 -0
together/_constants.py +14 -0
together/_exceptions.py +108 -0
together/_files.py +123 -0
together/_models.py +857 -0
together/_qs.py +150 -0
together/_resource.py +43 -0
together/_response.py +830 -0
together/_streaming.py +370 -0
together/_types.py +260 -0
together/_utils/__init__.py +64 -0
together/_utils/_compat.py +45 -0
together/_utils/_datetime_parse.py +136 -0
together/_utils/_logs.py +25 -0
together/_utils/_proxy.py +65 -0
together/_utils/_reflection.py +42 -0
together/_utils/_resources_proxy.py +24 -0
together/_utils/_streams.py +12 -0
together/_utils/_sync.py +58 -0
together/_utils/_transform.py +457 -0
together/_utils/_typing.py +156 -0
together/_utils/_utils.py +421 -0
together/_version.py +4 -0
together/lib/.keep +4 -0
together/lib/__init__.py +23 -0
together/{cli → lib/cli}/api/endpoints.py +108 -75
together/lib/cli/api/evals.py +588 -0
together/{cli → lib/cli}/api/files.py +20 -17
together/{cli/api/finetune.py → lib/cli/api/fine_tuning.py} +161 -120
together/lib/cli/api/models.py +140 -0
together/{cli → lib/cli}/api/utils.py +6 -7
together/{cli → lib/cli}/cli.py +16 -24
together/{constants.py → lib/constants.py} +17 -12
together/lib/resources/__init__.py +11 -0
together/lib/resources/files.py +999 -0
together/lib/resources/fine_tuning.py +280 -0
together/lib/resources/models.py +35 -0
together/lib/types/__init__.py +13 -0
together/lib/types/error.py +9 -0
together/lib/types/fine_tuning.py +455 -0
together/{utils → lib/utils}/__init__.py +6 -14
together/{utils → lib/utils}/_log.py +11 -16
together/lib/utils/files.py +628 -0
together/lib/utils/serializer.py +10 -0
together/{utils → lib/utils}/tools.py +19 -55
together/resources/__init__.py +225 -33
together/resources/audio/__init__.py +72 -21
together/resources/audio/audio.py +198 -0
together/resources/audio/speech.py +574 -122
together/resources/audio/transcriptions.py +282 -0
together/resources/audio/translations.py +256 -0
together/resources/audio/voices.py +135 -0
together/resources/batches.py +417 -0
together/resources/chat/__init__.py +30 -21
together/resources/chat/chat.py +102 -0
together/resources/chat/completions.py +1063 -263
together/resources/code_interpreter/__init__.py +33 -0
together/resources/code_interpreter/code_interpreter.py +258 -0
together/resources/code_interpreter/sessions.py +135 -0
together/resources/completions.py +884 -225
together/resources/embeddings.py +172 -68
together/resources/endpoints.py +598 -395
together/resources/evals.py +452 -0
together/resources/files.py +398 -121
together/resources/fine_tuning.py +1033 -0
together/resources/hardware.py +181 -0
together/resources/images.py +256 -108
together/resources/jobs.py +214 -0
together/resources/models.py +238 -90
together/resources/rerank.py +190 -92
together/resources/videos.py +374 -0
together/types/__init__.py +65 -109
together/types/audio/__init__.py +10 -0
together/types/audio/speech_create_params.py +75 -0
together/types/audio/transcription_create_params.py +54 -0
together/types/audio/transcription_create_response.py +111 -0
together/types/audio/translation_create_params.py +40 -0
together/types/audio/translation_create_response.py +70 -0
together/types/audio/voice_list_response.py +23 -0
together/types/audio_speech_stream_chunk.py +16 -0
together/types/autoscaling.py +13 -0
together/types/autoscaling_param.py +15 -0
together/types/batch_create_params.py +24 -0
together/types/batch_create_response.py +14 -0
together/types/batch_job.py +45 -0
together/types/batch_list_response.py +10 -0
together/types/chat/__init__.py +18 -0
together/types/chat/chat_completion.py +60 -0
together/types/chat/chat_completion_chunk.py +61 -0
together/types/chat/chat_completion_structured_message_image_url_param.py +18 -0
together/types/chat/chat_completion_structured_message_text_param.py +13 -0
together/types/chat/chat_completion_structured_message_video_url_param.py +18 -0
together/types/chat/chat_completion_usage.py +13 -0
together/types/chat/chat_completion_warning.py +9 -0
together/types/chat/completion_create_params.py +329 -0
together/types/code_interpreter/__init__.py +5 -0
together/types/code_interpreter/session_list_response.py +31 -0
together/types/code_interpreter_execute_params.py +45 -0
together/types/completion.py +42 -0
together/types/completion_chunk.py +66 -0
together/types/completion_create_params.py +138 -0
together/types/dedicated_endpoint.py +44 -0
together/types/embedding.py +24 -0
together/types/embedding_create_params.py +31 -0
together/types/endpoint_create_params.py +43 -0
together/types/endpoint_list_avzones_response.py +11 -0
together/types/endpoint_list_params.py +18 -0
together/types/endpoint_list_response.py +41 -0
together/types/endpoint_update_params.py +27 -0
together/types/eval_create_params.py +263 -0
together/types/eval_create_response.py +16 -0
together/types/eval_list_params.py +21 -0
together/types/eval_list_response.py +10 -0
together/types/eval_status_response.py +100 -0
together/types/evaluation_job.py +139 -0
together/types/execute_response.py +108 -0
together/types/file_delete_response.py +13 -0
together/types/file_list.py +12 -0
together/types/file_purpose.py +9 -0
together/types/file_response.py +31 -0
together/types/file_type.py +7 -0
together/types/fine_tuning_cancel_response.py +194 -0
together/types/fine_tuning_content_params.py +24 -0
together/types/fine_tuning_delete_params.py +11 -0
together/types/fine_tuning_delete_response.py +12 -0
together/types/fine_tuning_list_checkpoints_response.py +21 -0
together/types/fine_tuning_list_events_response.py +12 -0
together/types/fine_tuning_list_response.py +199 -0
together/types/finetune_event.py +41 -0
together/types/finetune_event_type.py +33 -0
together/types/finetune_response.py +177 -0
together/types/hardware_list_params.py +16 -0
together/types/hardware_list_response.py +58 -0
together/types/image_data_b64.py +15 -0
together/types/image_data_url.py +15 -0
together/types/image_file.py +23 -0
together/types/image_generate_params.py +85 -0
together/types/job_list_response.py +47 -0
together/types/job_retrieve_response.py +43 -0
together/types/log_probs.py +18 -0
together/types/model_list_response.py +10 -0
together/types/model_object.py +42 -0
together/types/model_upload_params.py +36 -0
together/types/model_upload_response.py +23 -0
together/types/rerank_create_params.py +36 -0
together/types/rerank_create_response.py +36 -0
together/types/tool_choice.py +23 -0
together/types/tool_choice_param.py +23 -0
together/types/tools_param.py +23 -0
together/types/training_method_dpo.py +22 -0
together/types/training_method_sft.py +18 -0
together/types/video_create_params.py +86 -0
together/types/video_job.py +57 -0
together-2.0.0a8.dist-info/METADATA +680 -0
together-2.0.0a8.dist-info/RECORD +164 -0
{together-1.5.17.dist-info → together-2.0.0a8.dist-info}/WHEEL +1 -1
together-2.0.0a8.dist-info/entry_points.txt +2 -0
{together-1.5.17.dist-info → together-2.0.0a8.dist-info/licenses}/LICENSE +1 -1
together/abstract/api_requestor.py +0 -729
together/cli/api/chat.py +0 -276
together/cli/api/completions.py +0 -119
together/cli/api/images.py +0 -93
together/cli/api/models.py +0 -55
together/client.py +0 -176
together/error.py +0 -194
together/filemanager.py +0 -389
together/legacy/__init__.py +0 -0
together/legacy/base.py +0 -27
together/legacy/complete.py +0 -93
together/legacy/embeddings.py +0 -27
together/legacy/files.py +0 -146
together/legacy/finetune.py +0 -177
together/legacy/images.py +0 -27
together/legacy/models.py +0 -44
together/resources/batch.py +0 -136
together/resources/code_interpreter.py +0 -82
together/resources/finetune.py +0 -1064
together/together_response.py +0 -50
together/types/abstract.py +0 -26
together/types/audio_speech.py +0 -110
together/types/batch.py +0 -53
together/types/chat_completions.py +0 -197
together/types/code_interpreter.py +0 -57
together/types/common.py +0 -66
together/types/completions.py +0 -107
together/types/embeddings.py +0 -35
together/types/endpoints.py +0 -123
together/types/error.py +0 -16
together/types/files.py +0 -90
together/types/finetune.py +0 -398
together/types/images.py +0 -44
together/types/models.py +0 -45
together/types/rerank.py +0 -43
together/utils/api_helpers.py +0 -124
together/utils/files.py +0 -425
together/version.py +0 -6
together-1.5.17.dist-info/METADATA +0 -525
together-1.5.17.dist-info/RECORD +0 -69
together-1.5.17.dist-info/entry_points.txt +0 -3
/together/{abstract → lib/cli}/__init__.py +0 -0
/together/{cli → lib/cli/api}/__init__.py +0 -0
/together/{cli/api/__init__.py → py.typed} +0 -0

together/resources/endpoints.py CHANGED Viewed

@@ -1,508 +1,711 @@
-from __future__ import annotations
+# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
-from typing import Dict, List, Literal, Optional, Union
+from __future__ import annotations
-from together.abstract import api_requestor
-from together.together_response import TogetherResponse
-from together.types import TogetherClient, TogetherRequest
-from together.types.endpoints import DedicatedEndpoint, HardwareWithStatus, ListEndpoint
+from typing import Optional
+from typing_extensions import Literal
+import httpx
+from ..types import endpoint_list_params, endpoint_create_params, endpoint_update_params
+from .._types import Body, Omit, Query, Headers, NoneType, NotGiven, omit, not_given
+from .._utils import maybe_transform, async_maybe_transform
+from .._compat import cached_property
+from .._resource import SyncAPIResource, AsyncAPIResource
+from .._response import (
+    to_raw_response_wrapper,
+    to_streamed_response_wrapper,
+    async_to_raw_response_wrapper,
+    async_to_streamed_response_wrapper,
+)
+from .._base_client import make_request_options
+from ..types.autoscaling_param import AutoscalingParam
+from ..types.dedicated_endpoint import DedicatedEndpoint
+from ..types.endpoint_list_response import EndpointListResponse
+from ..types.endpoint_list_avzones_response import EndpointListAvzonesResponse
+__all__ = ["EndpointsResource", "AsyncEndpointsResource"]
+class EndpointsResource(SyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> EndpointsResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+        For more information, see https://www.github.com/togethercomputer/together-py#accessing-raw-response-data-eg-headers
+        """
+        return EndpointsResourceWithRawResponse(self)
-class Endpoints:
-    def __init__(self, client: TogetherClient) -> None:
-        self._client = client
+    @cached_property
+    def with_streaming_response(self) -> EndpointsResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-    def list(
-        self, type: Optional[Literal["dedicated", "serverless"]] = None
-    ) -> List[ListEndpoint]:
+        For more information, see https://www.github.com/togethercomputer/together-py#with_streaming_response
         """
-        List all endpoints, can be filtered by type.
+        return EndpointsResourceWithStreamingResponse(self)
+    def create(
+        self,
+        *,
+        autoscaling: AutoscalingParam,
+        hardware: str,
+        model: str,
+        availability_zone: str | Omit = omit,
+        disable_prompt_cache: bool | Omit = omit,
+        disable_speculative_decoding: bool | Omit = omit,
+        display_name: str | Omit = omit,
+        inactive_timeout: Optional[int] | Omit = omit,
+        state: Literal["STARTED", "STOPPED"] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> DedicatedEndpoint:
+        """Creates a new dedicated endpoint for serving models.
+        The endpoint will
+        automatically start after creation. You can deploy any supported model on
+        hardware configurations that meet the model's requirements.
         Args:
-            type (str, optional): Filter endpoints by type ("dedicated" or "serverless"). Defaults to None.
+          autoscaling: Configuration for automatic scaling of the endpoint
-        Returns:
-            List[ListEndpoint]: List of endpoint objects
-        """
-        requestor = api_requestor.APIRequestor(
-            client=self._client,
-        )
+          hardware: The hardware configuration to use for this endpoint
-        params = {}
-        if type is not None:
-            params["type"] = type
+          model: The model to deploy on this endpoint
-        response, _, _ = requestor.request(
-            options=TogetherRequest(
-                method="GET",
-                url="endpoints",
-                params=params,
-            ),
-            stream=False,
-        )
+          availability_zone: Create the endpoint in a specified availability zone (e.g., us-central-4b)
-        response.data = response.data["data"]
+          disable_prompt_cache: Whether to disable the prompt cache for this endpoint
-        assert isinstance(response, TogetherResponse)
-        assert isinstance(response.data, list)
+          disable_speculative_decoding: Whether to disable speculative decoding for this endpoint
-        return [ListEndpoint(**endpoint) for endpoint in response.data]
+          display_name: A human-readable name for the endpoint
-    def create(
+          inactive_timeout: The number of minutes of inactivity after which the endpoint will be
+              automatically stopped. Set to null, omit or set to 0 to disable automatic
+              timeout.
+          state: The desired state of the endpoint
+          extra_headers: Send extra headers
+          extra_query: Add additional query parameters to the request
+          extra_body: Add additional JSON properties to the request
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return self._post(
+            "/endpoints",
+            body=maybe_transform(
+                {
+                    "autoscaling": autoscaling,
+                    "hardware": hardware,
+                    "model": model,
+                    "availability_zone": availability_zone,
+                    "disable_prompt_cache": disable_prompt_cache,
+                    "disable_speculative_decoding": disable_speculative_decoding,
+                    "display_name": display_name,
+                    "inactive_timeout": inactive_timeout,
+                    "state": state,
+                },
+                endpoint_create_params.EndpointCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=DedicatedEndpoint,
+        )
+    def retrieve(
         self,
+        endpoint_id: str,
         *,
-        model: str,
-        hardware: str,
-        min_replicas: int,
-        max_replicas: int,
-        display_name: Optional[str] = None,
-        disable_prompt_cache: bool = False,
-        disable_speculative_decoding: bool = False,
-        state: Literal["STARTED", "STOPPED"] = "STARTED",
-        inactive_timeout: Optional[int] = None,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> DedicatedEndpoint:
         """
-        Create a new dedicated endpoint.
+        Retrieves details about a specific endpoint, including its current state,
+        configuration, and scaling settings.
         Args:
-            model (str): The model to deploy on this endpoint
-            hardware (str): The hardware configuration to use for this endpoint
-            min_replicas (int): The minimum number of replicas to maintain
-            max_replicas (int): The maximum number of replicas to scale up to
-            display_name (str, optional): A human-readable name for the endpoint
-            disable_prompt_cache (bool, optional): Whether to disable the prompt cache. Defaults to False.
-            disable_speculative_decoding (bool, optional): Whether to disable speculative decoding. Defaults to False.
-            state (str, optional): The desired state of the endpoint. Defaults to "STARTED".
-            inactive_timeout (int, optional): The number of minutes of inactivity after which the endpoint will be automatically stopped. Set to 0 to disable automatic timeout.
-        Returns:
-            DedicatedEndpoint: Object containing endpoint information
+          extra_headers: Send extra headers
+          extra_query: Add additional query parameters to the request
+          extra_body: Add additional JSON properties to the request
+          timeout: Override the client-level default timeout for this request, in seconds
         """
-        requestor = api_requestor.APIRequestor(
-            client=self._client,
-        )
-        data: Dict[str, Union[str, bool, Dict[str, int], int]] = {
-            "model": model,
-            "hardware": hardware,
-            "autoscaling": {
-                "min_replicas": min_replicas,
-                "max_replicas": max_replicas,
-            },
-            "disable_prompt_cache": disable_prompt_cache,
-            "disable_speculative_decoding": disable_speculative_decoding,
-            "state": state,
-        }
-        if display_name is not None:
-            data["display_name"] = display_name
-        if inactive_timeout is not None:
-            data["inactive_timeout"] = inactive_timeout
-        response, _, _ = requestor.request(
-            options=TogetherRequest(
-                method="POST",
-                url="endpoints",
-                params=data,
+        if not endpoint_id:
+            raise ValueError(f"Expected a non-empty value for `endpoint_id` but received {endpoint_id!r}")
+        return self._get(
+            f"/endpoints/{endpoint_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            stream=False,
+            cast_to=DedicatedEndpoint,
         )
-        assert isinstance(response, TogetherResponse)
-        return DedicatedEndpoint(**response.data)
+    def update(
+        self,
+        endpoint_id: str,
+        *,
+        autoscaling: AutoscalingParam | Omit = omit,
+        display_name: str | Omit = omit,
+        inactive_timeout: Optional[int] | Omit = omit,
+        state: Literal["STARTED", "STOPPED"] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> DedicatedEndpoint:
+        """Updates an existing endpoint's configuration.
-    def get(self, endpoint_id: str) -> DedicatedEndpoint:
-        """
-        Get details of a specific endpoint.
+        You can modify the display name,
+        autoscaling settings, or change the endpoint's state (start/stop).
         Args:
-            endpoint_id (str): ID of the endpoint to retrieve
+          autoscaling: New autoscaling configuration for the endpoint
-        Returns:
-            DedicatedEndpoint: Object containing endpoint information
-        """
-        requestor = api_requestor.APIRequestor(
-            client=self._client,
-        )
+          display_name: A human-readable name for the endpoint
-        response, _, _ = requestor.request(
-            options=TogetherRequest(
-                method="GET",
-                url=f"endpoints/{endpoint_id}",
-            ),
-            stream=False,
-        )
+          inactive_timeout: The number of minutes of inactivity after which the endpoint will be
+              automatically stopped. Set to 0 to disable automatic timeout.
-        assert isinstance(response, TogetherResponse)
+          state: The desired state of the endpoint
-        return DedicatedEndpoint(**response.data)
+          extra_headers: Send extra headers
-    def delete(self, endpoint_id: str) -> None:
-        """
-        Delete a specific endpoint.
+          extra_query: Add additional query parameters to the request
-        Args:
-            endpoint_id (str): ID of the endpoint to delete
-        """
-        requestor = api_requestor.APIRequestor(
-            client=self._client,
-        )
+          extra_body: Add additional JSON properties to the request
-        requestor.request(
-            options=TogetherRequest(
-                method="DELETE",
-                url=f"endpoints/{endpoint_id}",
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        if not endpoint_id:
+            raise ValueError(f"Expected a non-empty value for `endpoint_id` but received {endpoint_id!r}")
+        return self._patch(
+            f"/endpoints/{endpoint_id}",
+            body=maybe_transform(
+                {
+                    "autoscaling": autoscaling,
+                    "display_name": display_name,
+                    "inactive_timeout": inactive_timeout,
+                    "state": state,
+                },
+                endpoint_update_params.EndpointUpdateParams,
             ),
-            stream=False,
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=DedicatedEndpoint,
         )
-    def update(
+    def list(
         self,
-        endpoint_id: str,
         *,
-        min_replicas: Optional[int] = None,
-        max_replicas: Optional[int] = None,
-        state: Optional[Literal["STARTED", "STOPPED"]] = None,
-        display_name: Optional[str] = None,
-        inactive_timeout: Optional[int] = None,
-    ) -> DedicatedEndpoint:
-        """
-        Update an endpoint's configuration.
+        mine: bool | Omit = omit,
+        type: Literal["dedicated", "serverless"] | Omit = omit,
+        usage_type: Literal["on-demand", "reserved"] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> EndpointListResponse:
+        """Returns a list of all endpoints associated with your account.
+        You can filter the
+        results by type (dedicated or serverless).
         Args:
-            endpoint_id (str): ID of the endpoint to update
-            min_replicas (int, optional): The minimum number of replicas to maintain
-            max_replicas (int, optional): The maximum number of replicas to scale up to
-            state (str, optional): The desired state of the endpoint ("STARTED" or "STOPPED")
-            display_name (str, optional): A human-readable name for the endpoint
-            inactive_timeout (int, optional): The number of minutes of inactivity after which the endpoint will be automatically stopped. Set to 0 to disable automatic timeout.
-        Returns:
-            DedicatedEndpoint: Object containing endpoint information
+          mine: If true, return only endpoints owned by the caller
+          type: Filter endpoints by type
+          usage_type: Filter endpoints by usage type
+          extra_headers: Send extra headers
+          extra_query: Add additional query parameters to the request
+          extra_body: Add additional JSON properties to the request
+          timeout: Override the client-level default timeout for this request, in seconds
         """
-        requestor = api_requestor.APIRequestor(
-            client=self._client,
-        )
-        data: Dict[str, Union[str, Dict[str, int], int]] = {}
-        if min_replicas is not None or max_replicas is not None:
-            current_min = min_replicas
-            current_max = max_replicas
-            if current_min is None or current_max is None:
-                # Get current values if only one is specified
-                current = self.get(endpoint_id=endpoint_id)
-                current_min = current_min or current.autoscaling.min_replicas
-                current_max = current_max or current.autoscaling.max_replicas
-            data["autoscaling"] = {
-                "min_replicas": current_min,
-                "max_replicas": current_max,
-            }
-        if state is not None:
-            data["state"] = state
-        if display_name is not None:
-            data["display_name"] = display_name
-        if inactive_timeout is not None:
-            data["inactive_timeout"] = inactive_timeout
-        response, _, _ = requestor.request(
-            options=TogetherRequest(
-                method="PATCH",
-                url=f"endpoints/{endpoint_id}",
-                params=data,
+        return self._get(
+            "/endpoints",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=maybe_transform(
+                    {
+                        "mine": mine,
+                        "type": type,
+                        "usage_type": usage_type,
+                    },
+                    endpoint_list_params.EndpointListParams,
+                ),
             ),
-            stream=False,
+            cast_to=EndpointListResponse,
         )
-        assert isinstance(response, TogetherResponse)
-        return DedicatedEndpoint(**response.data)
+    def delete(
+        self,
+        endpoint_id: str,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> None:
+        """Permanently deletes an endpoint.
-    def list_hardware(self, model: Optional[str] = None) -> List[HardwareWithStatus]:
-        """
-        List available hardware configurations.
+        This action cannot be undone.
         Args:
-            model (str, optional): Filter hardware configurations by model compatibility. When provided,
-                                 the response includes availability status for each compatible configuration.
+          extra_headers: Send extra headers
+          extra_query: Add additional query parameters to the request
+          extra_body: Add additional JSON properties to the request
-        Returns:
-            List[HardwareWithStatus]: List of hardware configurations with their status
+          timeout: Override the client-level default timeout for this request, in seconds
         """
-        requestor = api_requestor.APIRequestor(
-            client=self._client,
+        if not endpoint_id:
+            raise ValueError(f"Expected a non-empty value for `endpoint_id` but received {endpoint_id!r}")
+        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
+        return self._delete(
+            f"/endpoints/{endpoint_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=NoneType,
         )
-        params = {}
-        if model is not None:
-            params["model"] = model
-        response, _, _ = requestor.request(
-            options=TogetherRequest(
-                method="GET",
-                url="hardware",
-                params=params,
+    def list_avzones(
+        self,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> EndpointListAvzonesResponse:
+        """List all available availability zones."""
+        return self._get(
+            "/clusters/availability-zones",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            stream=False,
+            cast_to=EndpointListAvzonesResponse,
         )
-        assert isinstance(response, TogetherResponse)
-        assert isinstance(response.data, dict)
-        assert isinstance(response.data["data"], list)
-        return [HardwareWithStatus(**item) for item in response.data["data"]]
+class AsyncEndpointsResource(AsyncAPIResource):
+    @cached_property
+    def with_raw_response(self) -> AsyncEndpointsResourceWithRawResponse:
+        """
+        This property can be used as a prefix for any HTTP method call to return
+        the raw response object instead of the parsed content.
+        For more information, see https://www.github.com/togethercomputer/together-py#accessing-raw-response-data-eg-headers
+        """
+        return AsyncEndpointsResourceWithRawResponse(self)
-class AsyncEndpoints:
-    def __init__(self, client: TogetherClient) -> None:
-        self._client = client
+    @cached_property
+    def with_streaming_response(self) -> AsyncEndpointsResourceWithStreamingResponse:
+        """
+        An alternative to `.with_raw_response` that doesn't eagerly read the response body.
-    async def list(
-        self, type: Optional[Literal["dedicated", "serverless"]] = None
-    ) -> List[ListEndpoint]:
+        For more information, see https://www.github.com/togethercomputer/together-py#with_streaming_response
         """
-        List all endpoints, can be filtered by type.
+        return AsyncEndpointsResourceWithStreamingResponse(self)
+    async def create(
+        self,
+        *,
+        autoscaling: AutoscalingParam,
+        hardware: str,
+        model: str,
+        availability_zone: str | Omit = omit,
+        disable_prompt_cache: bool | Omit = omit,
+        disable_speculative_decoding: bool | Omit = omit,
+        display_name: str | Omit = omit,
+        inactive_timeout: Optional[int] | Omit = omit,
+        state: Literal["STARTED", "STOPPED"] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> DedicatedEndpoint:
+        """Creates a new dedicated endpoint for serving models.
+        The endpoint will
+        automatically start after creation. You can deploy any supported model on
+        hardware configurations that meet the model's requirements.
         Args:
-            type (str, optional): Filter endpoints by type ("dedicated" or "serverless"). Defaults to None.
+          autoscaling: Configuration for automatic scaling of the endpoint
-        Returns:
-            List[ListEndpoint]: List of endpoint objects
-        """
-        requestor = api_requestor.APIRequestor(
-            client=self._client,
-        )
+          hardware: The hardware configuration to use for this endpoint
-        params = {}
-        if type is not None:
-            params["type"] = type
+          model: The model to deploy on this endpoint
-        response, _, _ = await requestor.arequest(
-            options=TogetherRequest(
-                method="GET",
-                url="endpoints",
-                params=params,
-            ),
-            stream=False,
-        )
+          availability_zone: Create the endpoint in a specified availability zone (e.g., us-central-4b)
-        assert isinstance(response, TogetherResponse)
-        assert isinstance(response.data, list)
+          disable_prompt_cache: Whether to disable the prompt cache for this endpoint
-        return [ListEndpoint(**endpoint) for endpoint in response.data]
+          disable_speculative_decoding: Whether to disable speculative decoding for this endpoint
-    async def create(
+          display_name: A human-readable name for the endpoint
+          inactive_timeout: The number of minutes of inactivity after which the endpoint will be
+              automatically stopped. Set to null, omit or set to 0 to disable automatic
+              timeout.
+          state: The desired state of the endpoint
+          extra_headers: Send extra headers
+          extra_query: Add additional query parameters to the request
+          extra_body: Add additional JSON properties to the request
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._post(
+            "/endpoints",
+            body=await async_maybe_transform(
+                {
+                    "autoscaling": autoscaling,
+                    "hardware": hardware,
+                    "model": model,
+                    "availability_zone": availability_zone,
+                    "disable_prompt_cache": disable_prompt_cache,
+                    "disable_speculative_decoding": disable_speculative_decoding,
+                    "display_name": display_name,
+                    "inactive_timeout": inactive_timeout,
+                    "state": state,
+                },
+                endpoint_create_params.EndpointCreateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=DedicatedEndpoint,
+        )
+    async def retrieve(
         self,
+        endpoint_id: str,
         *,
-        model: str,
-        hardware: str,
-        min_replicas: int,
-        max_replicas: int,
-        display_name: Optional[str] = None,
-        disable_prompt_cache: bool = False,
-        disable_speculative_decoding: bool = False,
-        state: Literal["STARTED", "STOPPED"] = "STARTED",
-        inactive_timeout: Optional[int] = None,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
     ) -> DedicatedEndpoint:
         """
-        Create a new dedicated endpoint.
+        Retrieves details about a specific endpoint, including its current state,
+        configuration, and scaling settings.
         Args:
-            model (str): The model to deploy on this endpoint
-            hardware (str): The hardware configuration to use for this endpoint
-            min_replicas (int): The minimum number of replicas to maintain
-            max_replicas (int): The maximum number of replicas to scale up to
-            display_name (str, optional): A human-readable name for the endpoint
-            disable_prompt_cache (bool, optional): Whether to disable the prompt cache. Defaults to False.
-            disable_speculative_decoding (bool, optional): Whether to disable speculative decoding. Defaults to False.
-            state (str, optional): The desired state of the endpoint. Defaults to "STARTED".
-            inactive_timeout (int, optional): The number of minutes of inactivity after which the endpoint will be automatically stopped. Set to 0 to disable automatic timeout.
-        Returns:
-            DedicatedEndpoint: Object containing endpoint information
+          extra_headers: Send extra headers
+          extra_query: Add additional query parameters to the request
+          extra_body: Add additional JSON properties to the request
+          timeout: Override the client-level default timeout for this request, in seconds
         """
-        requestor = api_requestor.APIRequestor(
-            client=self._client,
-        )
-        data: Dict[str, Union[str, bool, Dict[str, int], int]] = {
-            "model": model,
-            "hardware": hardware,
-            "autoscaling": {
-                "min_replicas": min_replicas,
-                "max_replicas": max_replicas,
-            },
-            "disable_prompt_cache": disable_prompt_cache,
-            "disable_speculative_decoding": disable_speculative_decoding,
-            "state": state,
-        }
-        if display_name is not None:
-            data["display_name"] = display_name
-        if inactive_timeout is not None:
-            data["inactive_timeout"] = inactive_timeout
-        response, _, _ = await requestor.arequest(
-            options=TogetherRequest(
-                method="POST",
-                url="endpoints",
-                params=data,
+        if not endpoint_id:
+            raise ValueError(f"Expected a non-empty value for `endpoint_id` but received {endpoint_id!r}")
+        return await self._get(
+            f"/endpoints/{endpoint_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            stream=False,
+            cast_to=DedicatedEndpoint,
         )
-        assert isinstance(response, TogetherResponse)
-        return DedicatedEndpoint(**response.data)
+    async def update(
+        self,
+        endpoint_id: str,
+        *,
+        autoscaling: AutoscalingParam | Omit = omit,
+        display_name: str | Omit = omit,
+        inactive_timeout: Optional[int] | Omit = omit,
+        state: Literal["STARTED", "STOPPED"] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> DedicatedEndpoint:
+        """Updates an existing endpoint's configuration.
-    async def get(self, endpoint_id: str) -> DedicatedEndpoint:
-        """
-        Get details of a specific endpoint.
+        You can modify the display name,
+        autoscaling settings, or change the endpoint's state (start/stop).
         Args:
-            endpoint_id (str): ID of the endpoint to retrieve
+          autoscaling: New autoscaling configuration for the endpoint
-        Returns:
-            DedicatedEndpoint: Object containing endpoint information
-        """
-        requestor = api_requestor.APIRequestor(
-            client=self._client,
-        )
+          display_name: A human-readable name for the endpoint
-        response, _, _ = await requestor.arequest(
-            options=TogetherRequest(
-                method="GET",
-                url=f"endpoints/{endpoint_id}",
-            ),
-            stream=False,
-        )
+          inactive_timeout: The number of minutes of inactivity after which the endpoint will be
+              automatically stopped. Set to 0 to disable automatic timeout.
-        assert isinstance(response, TogetherResponse)
+          state: The desired state of the endpoint
-        return DedicatedEndpoint(**response.data)
+          extra_headers: Send extra headers
-    async def delete(self, endpoint_id: str) -> None:
-        """
-        Delete a specific endpoint.
+          extra_query: Add additional query parameters to the request
-        Args:
-            endpoint_id (str): ID of the endpoint to delete
+          extra_body: Add additional JSON properties to the request
+          timeout: Override the client-level default timeout for this request, in seconds
         """
-        requestor = api_requestor.APIRequestor(
-            client=self._client,
+        if not endpoint_id:
+            raise ValueError(f"Expected a non-empty value for `endpoint_id` but received {endpoint_id!r}")
+        return await self._patch(
+            f"/endpoints/{endpoint_id}",
+            body=await async_maybe_transform(
+                {
+                    "autoscaling": autoscaling,
+                    "display_name": display_name,
+                    "inactive_timeout": inactive_timeout,
+                    "state": state,
+                },
+                endpoint_update_params.EndpointUpdateParams,
+            ),
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=DedicatedEndpoint,
         )
-        await requestor.arequest(
-            options=TogetherRequest(
-                method="DELETE",
-                url=f"endpoints/{endpoint_id}",
+    async def list(
+        self,
+        *,
+        mine: bool | Omit = omit,
+        type: Literal["dedicated", "serverless"] | Omit = omit,
+        usage_type: Literal["on-demand", "reserved"] | Omit = omit,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> EndpointListResponse:
+        """Returns a list of all endpoints associated with your account.
+        You can filter the
+        results by type (dedicated or serverless).
+        Args:
+          mine: If true, return only endpoints owned by the caller
+          type: Filter endpoints by type
+          usage_type: Filter endpoints by usage type
+          extra_headers: Send extra headers
+          extra_query: Add additional query parameters to the request
+          extra_body: Add additional JSON properties to the request
+          timeout: Override the client-level default timeout for this request, in seconds
+        """
+        return await self._get(
+            "/endpoints",
+            options=make_request_options(
+                extra_headers=extra_headers,
+                extra_query=extra_query,
+                extra_body=extra_body,
+                timeout=timeout,
+                query=await async_maybe_transform(
+                    {
+                        "mine": mine,
+                        "type": type,
+                        "usage_type": usage_type,
+                    },
+                    endpoint_list_params.EndpointListParams,
+                ),
             ),
-            stream=False,
+            cast_to=EndpointListResponse,
         )
-    async def update(
+    async def delete(
         self,
         endpoint_id: str,
         *,
-        min_replicas: Optional[int] = None,
-        max_replicas: Optional[int] = None,
-        state: Optional[Literal["STARTED", "STOPPED"]] = None,
-        display_name: Optional[str] = None,
-        inactive_timeout: Optional[int] = None,
-    ) -> DedicatedEndpoint:
-        """
-        Update an endpoint's configuration.
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> None:
+        """Permanently deletes an endpoint.
+        This action cannot be undone.
         Args:
-            endpoint_id (str): ID of the endpoint to update
-            min_replicas (int, optional): The minimum number of replicas to maintain
-            max_replicas (int, optional): The maximum number of replicas to scale up to
-            state (str, optional): The desired state of the endpoint ("STARTED" or "STOPPED")
-            display_name (str, optional): A human-readable name for the endpoint
-            inactive_timeout (int, optional): The number of minutes of inactivity after which the endpoint will be automatically stopped. Set to 0 to disable automatic timeout.
-        Returns:
-            DedicatedEndpoint: Object containing endpoint information
+          extra_headers: Send extra headers
+          extra_query: Add additional query parameters to the request
+          extra_body: Add additional JSON properties to the request
+          timeout: Override the client-level default timeout for this request, in seconds
         """
-        requestor = api_requestor.APIRequestor(
-            client=self._client,
-        )
-        data: Dict[str, Union[str, Dict[str, int], int]] = {}
-        if min_replicas is not None or max_replicas is not None:
-            current_min = min_replicas
-            current_max = max_replicas
-            if current_min is None or current_max is None:
-                # Get current values if only one is specified
-                current = await self.get(endpoint_id=endpoint_id)
-                current_min = current_min or current.autoscaling.min_replicas
-                current_max = current_max or current.autoscaling.max_replicas
-            data["autoscaling"] = {
-                "min_replicas": current_min,
-                "max_replicas": current_max,
-            }
-        if state is not None:
-            data["state"] = state
-        if display_name is not None:
-            data["display_name"] = display_name
-        if inactive_timeout is not None:
-            data["inactive_timeout"] = inactive_timeout
-        response, _, _ = await requestor.arequest(
-            options=TogetherRequest(
-                method="PATCH",
-                url=f"endpoints/{endpoint_id}",
-                params=data,
+        if not endpoint_id:
+            raise ValueError(f"Expected a non-empty value for `endpoint_id` but received {endpoint_id!r}")
+        extra_headers = {"Accept": "*/*", **(extra_headers or {})}
+        return await self._delete(
+            f"/endpoints/{endpoint_id}",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
             ),
-            stream=False,
+            cast_to=NoneType,
         )
-        assert isinstance(response, TogetherResponse)
+    async def list_avzones(
+        self,
+        *,
+        # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
+        # The extra values given here take precedence over values defined on the client or passed to this method.
+        extra_headers: Headers | None = None,
+        extra_query: Query | None = None,
+        extra_body: Body | None = None,
+        timeout: float | httpx.Timeout | None | NotGiven = not_given,
+    ) -> EndpointListAvzonesResponse:
+        """List all available availability zones."""
+        return await self._get(
+            "/clusters/availability-zones",
+            options=make_request_options(
+                extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
+            ),
+            cast_to=EndpointListAvzonesResponse,
+        )
-        return DedicatedEndpoint(**response.data)
-    async def list_hardware(
-        self, model: Optional[str] = None
-    ) -> List[HardwareWithStatus]:
-        """
-        List available hardware configurations.
+class EndpointsResourceWithRawResponse:
+    def __init__(self, endpoints: EndpointsResource) -> None:
+        self._endpoints = endpoints
-        Args:
-            model (str, optional): Filter hardware configurations by model compatibility. When provided,
-                                 the response includes availability status for each compatible configuration.
+        self.create = to_raw_response_wrapper(
+            endpoints.create,
+        )
+        self.retrieve = to_raw_response_wrapper(
+            endpoints.retrieve,
+        )
+        self.update = to_raw_response_wrapper(
+            endpoints.update,
+        )
+        self.list = to_raw_response_wrapper(
+            endpoints.list,
+        )
+        self.delete = to_raw_response_wrapper(
+            endpoints.delete,
+        )
+        self.list_avzones = to_raw_response_wrapper(
+            endpoints.list_avzones,
+        )
-        Returns:
-            List[HardwareWithStatus]: List of hardware configurations with their status
-        """
-        requestor = api_requestor.APIRequestor(
-            client=self._client,
+class AsyncEndpointsResourceWithRawResponse:
+    def __init__(self, endpoints: AsyncEndpointsResource) -> None:
+        self._endpoints = endpoints
+        self.create = async_to_raw_response_wrapper(
+            endpoints.create,
+        )
+        self.retrieve = async_to_raw_response_wrapper(
+            endpoints.retrieve,
+        )
+        self.update = async_to_raw_response_wrapper(
+            endpoints.update,
+        )
+        self.list = async_to_raw_response_wrapper(
+            endpoints.list,
+        )
+        self.delete = async_to_raw_response_wrapper(
+            endpoints.delete,
+        )
+        self.list_avzones = async_to_raw_response_wrapper(
+            endpoints.list_avzones,
         )
-        params = {}
-        if model is not None:
-            params["model"] = model
-        response, _, _ = await requestor.arequest(
-            options=TogetherRequest(
-                method="GET",
-                url="hardware",
-                params=params,
-            ),
-            stream=False,
+class EndpointsResourceWithStreamingResponse:
+    def __init__(self, endpoints: EndpointsResource) -> None:
+        self._endpoints = endpoints
+        self.create = to_streamed_response_wrapper(
+            endpoints.create,
+        )
+        self.retrieve = to_streamed_response_wrapper(
+            endpoints.retrieve,
         )
+        self.update = to_streamed_response_wrapper(
+            endpoints.update,
+        )
+        self.list = to_streamed_response_wrapper(
+            endpoints.list,
+        )
+        self.delete = to_streamed_response_wrapper(
+            endpoints.delete,
+        )
+        self.list_avzones = to_streamed_response_wrapper(
+            endpoints.list_avzones,
+        )
-        assert isinstance(response, TogetherResponse)
-        assert isinstance(response.data, dict)
-        assert isinstance(response.data["data"], list)
+class AsyncEndpointsResourceWithStreamingResponse:
+    def __init__(self, endpoints: AsyncEndpointsResource) -> None:
+        self._endpoints = endpoints
-        return [HardwareWithStatus(**item) for item in response.data["data"]]
+        self.create = async_to_streamed_response_wrapper(
+            endpoints.create,
+        )
+        self.retrieve = async_to_streamed_response_wrapper(
+            endpoints.retrieve,
+        )
+        self.update = async_to_streamed_response_wrapper(
+            endpoints.update,
+        )
+        self.list = async_to_streamed_response_wrapper(
+            endpoints.list,
+        )
+        self.delete = async_to_streamed_response_wrapper(
+            endpoints.delete,
+        )
+        self.list_avzones = async_to_streamed_response_wrapper(
+            endpoints.list_avzones,
+        )

together 1.5.17__py3-none-any.whl → 2.0.0a8__py3-none-any.whl

together 1.5.17py3-none-any.whl → 2.0.0a8py3-none-any.whl