together 1.5.35__py3-none-any.whl → 2.0.0a6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (208) hide show
  1. together/__init__.py +101 -114
  2. together/_base_client.py +1995 -0
  3. together/_client.py +1033 -0
  4. together/_compat.py +219 -0
  5. together/_constants.py +14 -0
  6. together/_exceptions.py +108 -0
  7. together/_files.py +123 -0
  8. together/_models.py +857 -0
  9. together/_qs.py +150 -0
  10. together/_resource.py +43 -0
  11. together/_response.py +830 -0
  12. together/_streaming.py +370 -0
  13. together/_types.py +260 -0
  14. together/_utils/__init__.py +64 -0
  15. together/_utils/_compat.py +45 -0
  16. together/_utils/_datetime_parse.py +136 -0
  17. together/_utils/_logs.py +25 -0
  18. together/_utils/_proxy.py +65 -0
  19. together/_utils/_reflection.py +42 -0
  20. together/_utils/_resources_proxy.py +24 -0
  21. together/_utils/_streams.py +12 -0
  22. together/_utils/_sync.py +58 -0
  23. together/_utils/_transform.py +457 -0
  24. together/_utils/_typing.py +156 -0
  25. together/_utils/_utils.py +421 -0
  26. together/_version.py +4 -0
  27. together/lib/.keep +4 -0
  28. together/lib/__init__.py +23 -0
  29. together/{cli → lib/cli}/api/endpoints.py +66 -84
  30. together/{cli/api/evaluation.py → lib/cli/api/evals.py} +152 -43
  31. together/{cli → lib/cli}/api/files.py +20 -17
  32. together/{cli/api/finetune.py → lib/cli/api/fine_tuning.py} +116 -172
  33. together/{cli → lib/cli}/api/models.py +34 -27
  34. together/lib/cli/api/utils.py +50 -0
  35. together/{cli → lib/cli}/cli.py +16 -26
  36. together/{constants.py → lib/constants.py} +11 -24
  37. together/lib/resources/__init__.py +11 -0
  38. together/lib/resources/files.py +999 -0
  39. together/lib/resources/fine_tuning.py +280 -0
  40. together/lib/resources/models.py +35 -0
  41. together/lib/types/__init__.py +13 -0
  42. together/lib/types/error.py +9 -0
  43. together/lib/types/fine_tuning.py +397 -0
  44. together/{utils → lib/utils}/__init__.py +6 -14
  45. together/{utils → lib/utils}/_log.py +11 -16
  46. together/{utils → lib/utils}/files.py +90 -288
  47. together/lib/utils/serializer.py +10 -0
  48. together/{utils → lib/utils}/tools.py +19 -55
  49. together/resources/__init__.py +225 -39
  50. together/resources/audio/__init__.py +72 -48
  51. together/resources/audio/audio.py +198 -0
  52. together/resources/audio/speech.py +574 -128
  53. together/resources/audio/transcriptions.py +247 -261
  54. together/resources/audio/translations.py +221 -241
  55. together/resources/audio/voices.py +111 -41
  56. together/resources/batches.py +417 -0
  57. together/resources/chat/__init__.py +30 -21
  58. together/resources/chat/chat.py +102 -0
  59. together/resources/chat/completions.py +1063 -263
  60. together/resources/code_interpreter/__init__.py +33 -0
  61. together/resources/code_interpreter/code_interpreter.py +258 -0
  62. together/resources/code_interpreter/sessions.py +135 -0
  63. together/resources/completions.py +884 -225
  64. together/resources/embeddings.py +172 -68
  65. together/resources/endpoints.py +589 -490
  66. together/resources/evals.py +452 -0
  67. together/resources/files.py +397 -129
  68. together/resources/fine_tuning.py +1033 -0
  69. together/resources/hardware.py +181 -0
  70. together/resources/images.py +258 -104
  71. together/resources/jobs.py +214 -0
  72. together/resources/models.py +223 -193
  73. together/resources/rerank.py +190 -92
  74. together/resources/videos.py +286 -214
  75. together/types/__init__.py +66 -167
  76. together/types/audio/__init__.py +10 -0
  77. together/types/audio/speech_create_params.py +75 -0
  78. together/types/audio/transcription_create_params.py +54 -0
  79. together/types/audio/transcription_create_response.py +111 -0
  80. together/types/audio/translation_create_params.py +40 -0
  81. together/types/audio/translation_create_response.py +70 -0
  82. together/types/audio/voice_list_response.py +23 -0
  83. together/types/audio_speech_stream_chunk.py +16 -0
  84. together/types/autoscaling.py +13 -0
  85. together/types/autoscaling_param.py +15 -0
  86. together/types/batch_create_params.py +24 -0
  87. together/types/batch_create_response.py +14 -0
  88. together/types/batch_job.py +45 -0
  89. together/types/batch_list_response.py +10 -0
  90. together/types/chat/__init__.py +18 -0
  91. together/types/chat/chat_completion.py +60 -0
  92. together/types/chat/chat_completion_chunk.py +61 -0
  93. together/types/chat/chat_completion_structured_message_image_url_param.py +18 -0
  94. together/types/chat/chat_completion_structured_message_text_param.py +13 -0
  95. together/types/chat/chat_completion_structured_message_video_url_param.py +18 -0
  96. together/types/chat/chat_completion_usage.py +13 -0
  97. together/types/chat/chat_completion_warning.py +9 -0
  98. together/types/chat/completion_create_params.py +329 -0
  99. together/types/code_interpreter/__init__.py +5 -0
  100. together/types/code_interpreter/session_list_response.py +31 -0
  101. together/types/code_interpreter_execute_params.py +45 -0
  102. together/types/completion.py +42 -0
  103. together/types/completion_chunk.py +66 -0
  104. together/types/completion_create_params.py +138 -0
  105. together/types/dedicated_endpoint.py +44 -0
  106. together/types/embedding.py +24 -0
  107. together/types/embedding_create_params.py +31 -0
  108. together/types/endpoint_create_params.py +43 -0
  109. together/types/endpoint_list_avzones_response.py +11 -0
  110. together/types/endpoint_list_params.py +18 -0
  111. together/types/endpoint_list_response.py +41 -0
  112. together/types/endpoint_update_params.py +27 -0
  113. together/types/eval_create_params.py +263 -0
  114. together/types/eval_create_response.py +16 -0
  115. together/types/eval_list_params.py +21 -0
  116. together/types/eval_list_response.py +10 -0
  117. together/types/eval_status_response.py +100 -0
  118. together/types/evaluation_job.py +139 -0
  119. together/types/execute_response.py +108 -0
  120. together/types/file_delete_response.py +13 -0
  121. together/types/file_list.py +12 -0
  122. together/types/file_purpose.py +9 -0
  123. together/types/file_response.py +31 -0
  124. together/types/file_type.py +7 -0
  125. together/types/fine_tuning_cancel_response.py +194 -0
  126. together/types/fine_tuning_content_params.py +24 -0
  127. together/types/fine_tuning_delete_params.py +11 -0
  128. together/types/fine_tuning_delete_response.py +12 -0
  129. together/types/fine_tuning_list_checkpoints_response.py +21 -0
  130. together/types/fine_tuning_list_events_response.py +12 -0
  131. together/types/fine_tuning_list_response.py +199 -0
  132. together/types/finetune_event.py +41 -0
  133. together/types/finetune_event_type.py +33 -0
  134. together/types/finetune_response.py +177 -0
  135. together/types/hardware_list_params.py +16 -0
  136. together/types/hardware_list_response.py +58 -0
  137. together/types/image_data_b64.py +15 -0
  138. together/types/image_data_url.py +15 -0
  139. together/types/image_file.py +23 -0
  140. together/types/image_generate_params.py +85 -0
  141. together/types/job_list_response.py +47 -0
  142. together/types/job_retrieve_response.py +43 -0
  143. together/types/log_probs.py +18 -0
  144. together/types/model_list_response.py +10 -0
  145. together/types/model_object.py +42 -0
  146. together/types/model_upload_params.py +36 -0
  147. together/types/model_upload_response.py +23 -0
  148. together/types/rerank_create_params.py +36 -0
  149. together/types/rerank_create_response.py +36 -0
  150. together/types/tool_choice.py +23 -0
  151. together/types/tool_choice_param.py +23 -0
  152. together/types/tools_param.py +23 -0
  153. together/types/training_method_dpo.py +22 -0
  154. together/types/training_method_sft.py +18 -0
  155. together/types/video_create_params.py +86 -0
  156. together/types/video_create_response.py +10 -0
  157. together/types/video_job.py +57 -0
  158. together-2.0.0a6.dist-info/METADATA +729 -0
  159. together-2.0.0a6.dist-info/RECORD +165 -0
  160. {together-1.5.35.dist-info → together-2.0.0a6.dist-info}/WHEEL +1 -1
  161. together-2.0.0a6.dist-info/entry_points.txt +2 -0
  162. {together-1.5.35.dist-info → together-2.0.0a6.dist-info}/licenses/LICENSE +1 -1
  163. together/abstract/api_requestor.py +0 -770
  164. together/cli/api/chat.py +0 -298
  165. together/cli/api/completions.py +0 -119
  166. together/cli/api/images.py +0 -93
  167. together/cli/api/utils.py +0 -139
  168. together/client.py +0 -186
  169. together/error.py +0 -194
  170. together/filemanager.py +0 -635
  171. together/legacy/__init__.py +0 -0
  172. together/legacy/base.py +0 -27
  173. together/legacy/complete.py +0 -93
  174. together/legacy/embeddings.py +0 -27
  175. together/legacy/files.py +0 -146
  176. together/legacy/finetune.py +0 -177
  177. together/legacy/images.py +0 -27
  178. together/legacy/models.py +0 -44
  179. together/resources/batch.py +0 -165
  180. together/resources/code_interpreter.py +0 -82
  181. together/resources/evaluation.py +0 -808
  182. together/resources/finetune.py +0 -1388
  183. together/together_response.py +0 -50
  184. together/types/abstract.py +0 -26
  185. together/types/audio_speech.py +0 -311
  186. together/types/batch.py +0 -54
  187. together/types/chat_completions.py +0 -210
  188. together/types/code_interpreter.py +0 -57
  189. together/types/common.py +0 -67
  190. together/types/completions.py +0 -107
  191. together/types/embeddings.py +0 -35
  192. together/types/endpoints.py +0 -123
  193. together/types/error.py +0 -16
  194. together/types/evaluation.py +0 -93
  195. together/types/files.py +0 -93
  196. together/types/finetune.py +0 -465
  197. together/types/images.py +0 -42
  198. together/types/models.py +0 -96
  199. together/types/rerank.py +0 -43
  200. together/types/videos.py +0 -69
  201. together/utils/api_helpers.py +0 -124
  202. together/version.py +0 -6
  203. together-1.5.35.dist-info/METADATA +0 -583
  204. together-1.5.35.dist-info/RECORD +0 -77
  205. together-1.5.35.dist-info/entry_points.txt +0 -3
  206. /together/{abstract → lib/cli}/__init__.py +0 -0
  207. /together/{cli → lib/cli/api}/__init__.py +0 -0
  208. /together/{cli/api/__init__.py → py.typed} +0 -0
@@ -0,0 +1,45 @@
1
+ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Iterable
6
+ from typing_extensions import Literal, Required, TypedDict
7
+
8
+ __all__ = ["CodeInterpreterExecuteParams", "File"]
9
+
10
+
11
+ class CodeInterpreterExecuteParams(TypedDict, total=False):
12
+ code: Required[str]
13
+ """Code snippet to execute."""
14
+
15
+ language: Required[Literal["python"]]
16
+ """Programming language for the code to execute.
17
+
18
+ Currently only supports Python, but more will be added.
19
+ """
20
+
21
+ files: Iterable[File]
22
+ """Files to upload to the session.
23
+
24
+ If present, files will be uploaded before executing the given code.
25
+ """
26
+
27
+ session_id: str
28
+ """Identifier of the current session.
29
+
30
+ Used to make follow-up calls. Requests will return an error if the session does
31
+ not belong to the caller or has expired.
32
+ """
33
+
34
+
35
+ class File(TypedDict, total=False):
36
+ content: Required[str]
37
+
38
+ encoding: Required[Literal["string", "base64"]]
39
+ """Encoding of the file content.
40
+
41
+ Use `string` for text files such as code, and `base64` for binary files, such as
42
+ images.
43
+ """
44
+
45
+ name: Required[str]
@@ -0,0 +1,42 @@
1
+ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
+
3
+ from typing import List, Optional
4
+ from typing_extensions import Literal
5
+
6
+ from .._models import BaseModel
7
+ from .log_probs import LogProbs
8
+ from .chat.chat_completion_usage import ChatCompletionUsage
9
+
10
+ __all__ = ["Completion", "Choice", "Prompt"]
11
+
12
+
13
+ class Choice(BaseModel):
14
+ finish_reason: Optional[Literal["stop", "eos", "length", "tool_calls", "function_call"]] = None
15
+
16
+ logprobs: Optional[LogProbs] = None
17
+
18
+ seed: Optional[int] = None
19
+
20
+ text: Optional[str] = None
21
+
22
+
23
+ class Prompt(BaseModel):
24
+ logprobs: Optional[LogProbs] = None
25
+
26
+ text: Optional[str] = None
27
+
28
+
29
+ class Completion(BaseModel):
30
+ id: str
31
+
32
+ choices: List[Choice]
33
+
34
+ created: int
35
+
36
+ model: str
37
+
38
+ object: Literal["text.completion"]
39
+
40
+ usage: Optional[ChatCompletionUsage] = None
41
+
42
+ prompt: Optional[List[Prompt]] = None
@@ -0,0 +1,66 @@
1
+ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
+
3
+ from typing import List, Optional
4
+ from typing_extensions import Literal
5
+
6
+ from .._models import BaseModel
7
+ from .tool_choice import ToolChoice
8
+ from .chat.chat_completion_usage import ChatCompletionUsage
9
+
10
+ __all__ = ["CompletionChunk", "Token", "Choice", "ChoiceDelta", "ChoiceDeltaFunctionCall"]
11
+
12
+
13
+ class Token(BaseModel):
14
+ id: int
15
+
16
+ logprob: float
17
+
18
+ special: bool
19
+
20
+ text: str
21
+
22
+
23
+ class ChoiceDeltaFunctionCall(BaseModel):
24
+ arguments: str
25
+
26
+ name: str
27
+
28
+
29
+ class ChoiceDelta(BaseModel):
30
+ role: Literal["system", "user", "assistant", "function", "tool"]
31
+
32
+ content: Optional[str] = None
33
+
34
+ function_call: Optional[ChoiceDeltaFunctionCall] = None
35
+
36
+ reasoning: Optional[str] = None
37
+
38
+ token_id: Optional[int] = None
39
+
40
+ tool_calls: Optional[List[ToolChoice]] = None
41
+
42
+
43
+ class Choice(BaseModel):
44
+ index: int
45
+
46
+ delta: Optional[ChoiceDelta] = None
47
+
48
+ text: Optional[str] = None
49
+
50
+
51
+ class CompletionChunk(BaseModel):
52
+ id: str
53
+
54
+ token: Token
55
+
56
+ choices: List[Choice]
57
+
58
+ finish_reason: Optional[Literal["stop", "eos", "length", "tool_calls", "function_call"]] = None
59
+
60
+ usage: Optional[ChatCompletionUsage] = None
61
+
62
+ created: Optional[int] = None
63
+
64
+ object: Optional[Literal["completion.chunk"]] = None
65
+
66
+ seed: Optional[int] = None
@@ -0,0 +1,138 @@
1
+ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Dict, Union
6
+ from typing_extensions import Literal, Required, TypedDict
7
+
8
+ from .._types import SequenceNotStr
9
+
10
+ __all__ = ["CompletionCreateParamsBase", "CompletionCreateParamsNonStreaming", "CompletionCreateParamsStreaming"]
11
+
12
+
13
+ class CompletionCreateParamsBase(TypedDict, total=False):
14
+ model: Required[
15
+ Union[
16
+ Literal[
17
+ "meta-llama/Llama-2-70b-hf",
18
+ "mistralai/Mistral-7B-v0.1",
19
+ "mistralai/Mixtral-8x7B-v0.1",
20
+ "Meta-Llama/Llama-Guard-7b",
21
+ ],
22
+ str,
23
+ ]
24
+ ]
25
+ """The name of the model to query.
26
+
27
+ [See all of Together AI's chat models](https://docs.together.ai/docs/serverless-models#chat-models)
28
+ """
29
+
30
+ prompt: Required[str]
31
+ """A string providing context for the model to complete."""
32
+
33
+ echo: bool
34
+ """If true, the response will contain the prompt.
35
+
36
+ Can be used with `logprobs` to return prompt logprobs.
37
+ """
38
+
39
+ frequency_penalty: float
40
+ """
41
+ A number between -2.0 and 2.0 where a positive value decreases the likelihood of
42
+ repeating tokens that have already been mentioned.
43
+ """
44
+
45
+ logit_bias: Dict[str, float]
46
+ """Adjusts the likelihood of specific tokens appearing in the generated output."""
47
+
48
+ logprobs: int
49
+ """
50
+ An integer between 0 and 20 of the top k tokens to return log probabilities for
51
+ at each generation step, instead of just the sampled token. Log probabilities
52
+ help assess model confidence in token predictions.
53
+ """
54
+
55
+ max_tokens: int
56
+ """The maximum number of tokens to generate."""
57
+
58
+ min_p: float
59
+ """A number between 0 and 1 that can be used as an alternative to top-p and top-k."""
60
+
61
+ n: int
62
+ """The number of completions to generate for each prompt."""
63
+
64
+ presence_penalty: float
65
+ """
66
+ A number between -2.0 and 2.0 where a positive value increases the likelihood of
67
+ a model talking about new topics.
68
+ """
69
+
70
+ repetition_penalty: float
71
+ """
72
+ A number that controls the diversity of generated text by reducing the
73
+ likelihood of repeated sequences. Higher values decrease repetition.
74
+ """
75
+
76
+ safety_model: Union[Literal["Meta-Llama/Llama-Guard-7b"], str]
77
+ """The name of the moderation model used to validate tokens.
78
+
79
+ Choose from the available moderation models found
80
+ [here](https://docs.together.ai/docs/inference-models#moderation-models).
81
+ """
82
+
83
+ seed: int
84
+ """Seed value for reproducibility."""
85
+
86
+ stop: SequenceNotStr[str]
87
+ """A list of string sequences that will truncate (stop) inference text output.
88
+
89
+ For example, "</s>" will stop generation as soon as the model generates the
90
+ given token.
91
+ """
92
+
93
+ temperature: float
94
+ """
95
+ A decimal number from 0-1 that determines the degree of randomness in the
96
+ response. A temperature less than 1 favors more correctness and is appropriate
97
+ for question answering or summarization. A value closer to 1 introduces more
98
+ randomness in the output.
99
+ """
100
+
101
+ top_k: int
102
+ """
103
+ An integer that's used to limit the number of choices for the next predicted
104
+ word or token. It specifies the maximum number of tokens to consider at each
105
+ step, based on their probability of occurrence. This technique helps to speed up
106
+ the generation process and can improve the quality of the generated text by
107
+ focusing on the most likely options.
108
+ """
109
+
110
+ top_p: float
111
+ """
112
+ A percentage (also called the nucleus parameter) that's used to dynamically
113
+ adjust the number of choices for each predicted token based on the cumulative
114
+ probabilities. It specifies a probability threshold below which all less likely
115
+ tokens are filtered out. This technique helps maintain diversity and generate
116
+ more fluent and natural-sounding text.
117
+ """
118
+
119
+
120
+ class CompletionCreateParamsNonStreaming(CompletionCreateParamsBase, total=False):
121
+ stream: Literal[False]
122
+ """
123
+ If true, stream tokens as Server-Sent Events as the model generates them instead
124
+ of waiting for the full model response. The stream terminates with
125
+ `data: [DONE]`. If false, return a single JSON object containing the results.
126
+ """
127
+
128
+
129
+ class CompletionCreateParamsStreaming(CompletionCreateParamsBase):
130
+ stream: Required[Literal[True]]
131
+ """
132
+ If true, stream tokens as Server-Sent Events as the model generates them instead
133
+ of waiting for the full model response. The stream terminates with
134
+ `data: [DONE]`. If false, return a single JSON object containing the results.
135
+ """
136
+
137
+
138
+ CompletionCreateParams = Union[CompletionCreateParamsNonStreaming, CompletionCreateParamsStreaming]
@@ -0,0 +1,44 @@
1
+ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
+
3
+ from datetime import datetime
4
+ from typing_extensions import Literal
5
+
6
+ from .._models import BaseModel
7
+ from .autoscaling import Autoscaling
8
+
9
+ __all__ = ["DedicatedEndpoint"]
10
+
11
+
12
+ class DedicatedEndpoint(BaseModel):
13
+ id: str
14
+ """Unique identifier for the endpoint"""
15
+
16
+ autoscaling: Autoscaling
17
+ """Configuration for automatic scaling of the endpoint"""
18
+
19
+ created_at: datetime
20
+ """Timestamp when the endpoint was created"""
21
+
22
+ display_name: str
23
+ """Human-readable name for the endpoint"""
24
+
25
+ hardware: str
26
+ """The hardware configuration used for this endpoint"""
27
+
28
+ model: str
29
+ """The model deployed on this endpoint"""
30
+
31
+ name: str
32
+ """System name for the endpoint"""
33
+
34
+ object: Literal["endpoint"]
35
+ """The type of object"""
36
+
37
+ owner: str
38
+ """The owner of this endpoint"""
39
+
40
+ state: Literal["PENDING", "STARTING", "STARTED", "STOPPING", "STOPPED", "ERROR"]
41
+ """Current state of the endpoint"""
42
+
43
+ type: Literal["dedicated"]
44
+ """The type of endpoint"""
@@ -0,0 +1,24 @@
1
+ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
+
3
+ from typing import List
4
+ from typing_extensions import Literal
5
+
6
+ from .._models import BaseModel
7
+
8
+ __all__ = ["Embedding", "Data"]
9
+
10
+
11
+ class Data(BaseModel):
12
+ embedding: List[float]
13
+
14
+ index: int
15
+
16
+ object: Literal["embedding"]
17
+
18
+
19
+ class Embedding(BaseModel):
20
+ data: List[Data]
21
+
22
+ model: str
23
+
24
+ object: Literal["list"]
@@ -0,0 +1,31 @@
1
+ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Union
6
+ from typing_extensions import Literal, Required, TypedDict
7
+
8
+ from .._types import SequenceNotStr
9
+
10
+ __all__ = ["EmbeddingCreateParams"]
11
+
12
+
13
+ class EmbeddingCreateParams(TypedDict, total=False):
14
+ input: Required[Union[str, SequenceNotStr[str]]]
15
+ """A string providing the text for the model to embed."""
16
+
17
+ model: Required[
18
+ Union[
19
+ Literal[
20
+ "WhereIsAI/UAE-Large-V1",
21
+ "BAAI/bge-large-en-v1.5",
22
+ "BAAI/bge-base-en-v1.5",
23
+ "togethercomputer/m2-bert-80M-8k-retrieval",
24
+ ],
25
+ str,
26
+ ]
27
+ ]
28
+ """The name of the embedding model to use.
29
+
30
+ [See all of Together AI's embedding models](https://docs.together.ai/docs/serverless-models#embedding-models)
31
+ """
@@ -0,0 +1,43 @@
1
+ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Optional
6
+ from typing_extensions import Literal, Required, TypedDict
7
+
8
+ from .autoscaling_param import AutoscalingParam
9
+
10
+ __all__ = ["EndpointCreateParams"]
11
+
12
+
13
+ class EndpointCreateParams(TypedDict, total=False):
14
+ autoscaling: Required[AutoscalingParam]
15
+ """Configuration for automatic scaling of the endpoint"""
16
+
17
+ hardware: Required[str]
18
+ """The hardware configuration to use for this endpoint"""
19
+
20
+ model: Required[str]
21
+ """The model to deploy on this endpoint"""
22
+
23
+ availability_zone: str
24
+ """Create the endpoint in a specified availability zone (e.g., us-central-4b)"""
25
+
26
+ disable_prompt_cache: bool
27
+ """Whether to disable the prompt cache for this endpoint"""
28
+
29
+ disable_speculative_decoding: bool
30
+ """Whether to disable speculative decoding for this endpoint"""
31
+
32
+ display_name: str
33
+ """A human-readable name for the endpoint"""
34
+
35
+ inactive_timeout: Optional[int]
36
+ """
37
+ The number of minutes of inactivity after which the endpoint will be
38
+ automatically stopped. Set to null, omit or set to 0 to disable automatic
39
+ timeout.
40
+ """
41
+
42
+ state: Literal["STARTED", "STOPPED"]
43
+ """The desired state of the endpoint"""
@@ -0,0 +1,11 @@
1
+ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
+
3
+ from typing import List
4
+
5
+ from .._models import BaseModel
6
+
7
+ __all__ = ["EndpointListAvzonesResponse"]
8
+
9
+
10
+ class EndpointListAvzonesResponse(BaseModel):
11
+ avzones: List[str]
@@ -0,0 +1,18 @@
1
+ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing_extensions import Literal, TypedDict
6
+
7
+ __all__ = ["EndpointListParams"]
8
+
9
+
10
+ class EndpointListParams(TypedDict, total=False):
11
+ mine: bool
12
+ """If true, return only endpoints owned by the caller"""
13
+
14
+ type: Literal["dedicated", "serverless"]
15
+ """Filter endpoints by type"""
16
+
17
+ usage_type: Literal["on-demand", "reserved"]
18
+ """Filter endpoints by usage type"""
@@ -0,0 +1,41 @@
1
+ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
+
3
+ from typing import List
4
+ from datetime import datetime
5
+ from typing_extensions import Literal
6
+
7
+ from .._models import BaseModel
8
+
9
+ __all__ = ["EndpointListResponse", "Data"]
10
+
11
+
12
+ class Data(BaseModel):
13
+ id: str
14
+ """Unique identifier for the endpoint"""
15
+
16
+ created_at: datetime
17
+ """Timestamp when the endpoint was created"""
18
+
19
+ model: str
20
+ """The model deployed on this endpoint"""
21
+
22
+ name: str
23
+ """System name for the endpoint"""
24
+
25
+ object: Literal["endpoint"]
26
+ """The type of object"""
27
+
28
+ owner: str
29
+ """The owner of this endpoint"""
30
+
31
+ state: Literal["PENDING", "STARTING", "STARTED", "STOPPING", "STOPPED", "ERROR"]
32
+ """Current state of the endpoint"""
33
+
34
+ type: Literal["serverless", "dedicated"]
35
+ """The type of endpoint"""
36
+
37
+
38
+ class EndpointListResponse(BaseModel):
39
+ data: List[Data]
40
+
41
+ object: Literal["list"]
@@ -0,0 +1,27 @@
1
+ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Optional
6
+ from typing_extensions import Literal, TypedDict
7
+
8
+ from .autoscaling_param import AutoscalingParam
9
+
10
+ __all__ = ["EndpointUpdateParams"]
11
+
12
+
13
+ class EndpointUpdateParams(TypedDict, total=False):
14
+ autoscaling: AutoscalingParam
15
+ """New autoscaling configuration for the endpoint"""
16
+
17
+ display_name: str
18
+ """A human-readable name for the endpoint"""
19
+
20
+ inactive_timeout: Optional[int]
21
+ """
22
+ The number of minutes of inactivity after which the endpoint will be
23
+ automatically stopped. Set to 0 to disable automatic timeout.
24
+ """
25
+
26
+ state: Literal["STARTED", "STOPPED"]
27
+ """The desired state of the endpoint"""