together 1.5.34__py3-none-any.whl → 2.0.0a6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- together/__init__.py +101 -114
- together/_base_client.py +1995 -0
- together/_client.py +1033 -0
- together/_compat.py +219 -0
- together/_constants.py +14 -0
- together/_exceptions.py +108 -0
- together/_files.py +123 -0
- together/_models.py +857 -0
- together/_qs.py +150 -0
- together/_resource.py +43 -0
- together/_response.py +830 -0
- together/_streaming.py +370 -0
- together/_types.py +260 -0
- together/_utils/__init__.py +64 -0
- together/_utils/_compat.py +45 -0
- together/_utils/_datetime_parse.py +136 -0
- together/_utils/_logs.py +25 -0
- together/_utils/_proxy.py +65 -0
- together/_utils/_reflection.py +42 -0
- together/_utils/_resources_proxy.py +24 -0
- together/_utils/_streams.py +12 -0
- together/_utils/_sync.py +58 -0
- together/_utils/_transform.py +457 -0
- together/_utils/_typing.py +156 -0
- together/_utils/_utils.py +421 -0
- together/_version.py +4 -0
- together/lib/.keep +4 -0
- together/lib/__init__.py +23 -0
- together/{cli → lib/cli}/api/endpoints.py +65 -81
- together/{cli/api/evaluation.py → lib/cli/api/evals.py} +152 -43
- together/{cli → lib/cli}/api/files.py +20 -17
- together/{cli/api/finetune.py → lib/cli/api/fine_tuning.py} +116 -172
- together/{cli → lib/cli}/api/models.py +34 -27
- together/lib/cli/api/utils.py +50 -0
- together/{cli → lib/cli}/cli.py +16 -26
- together/{constants.py → lib/constants.py} +11 -24
- together/lib/resources/__init__.py +11 -0
- together/lib/resources/files.py +999 -0
- together/lib/resources/fine_tuning.py +280 -0
- together/lib/resources/models.py +35 -0
- together/lib/types/__init__.py +13 -0
- together/lib/types/error.py +9 -0
- together/lib/types/fine_tuning.py +397 -0
- together/{utils → lib/utils}/__init__.py +6 -14
- together/{utils → lib/utils}/_log.py +11 -16
- together/{utils → lib/utils}/files.py +90 -288
- together/lib/utils/serializer.py +10 -0
- together/{utils → lib/utils}/tools.py +19 -55
- together/resources/__init__.py +225 -39
- together/resources/audio/__init__.py +72 -48
- together/resources/audio/audio.py +198 -0
- together/resources/audio/speech.py +574 -128
- together/resources/audio/transcriptions.py +247 -261
- together/resources/audio/translations.py +221 -241
- together/resources/audio/voices.py +111 -41
- together/resources/batches.py +417 -0
- together/resources/chat/__init__.py +30 -21
- together/resources/chat/chat.py +102 -0
- together/resources/chat/completions.py +1063 -263
- together/resources/code_interpreter/__init__.py +33 -0
- together/resources/code_interpreter/code_interpreter.py +258 -0
- together/resources/code_interpreter/sessions.py +135 -0
- together/resources/completions.py +884 -225
- together/resources/embeddings.py +172 -68
- together/resources/endpoints.py +589 -477
- together/resources/evals.py +452 -0
- together/resources/files.py +397 -129
- together/resources/fine_tuning.py +1033 -0
- together/resources/hardware.py +181 -0
- together/resources/images.py +258 -104
- together/resources/jobs.py +214 -0
- together/resources/models.py +223 -193
- together/resources/rerank.py +190 -92
- together/resources/videos.py +286 -214
- together/types/__init__.py +66 -167
- together/types/audio/__init__.py +10 -0
- together/types/audio/speech_create_params.py +75 -0
- together/types/audio/transcription_create_params.py +54 -0
- together/types/audio/transcription_create_response.py +111 -0
- together/types/audio/translation_create_params.py +40 -0
- together/types/audio/translation_create_response.py +70 -0
- together/types/audio/voice_list_response.py +23 -0
- together/types/audio_speech_stream_chunk.py +16 -0
- together/types/autoscaling.py +13 -0
- together/types/autoscaling_param.py +15 -0
- together/types/batch_create_params.py +24 -0
- together/types/batch_create_response.py +14 -0
- together/types/batch_job.py +45 -0
- together/types/batch_list_response.py +10 -0
- together/types/chat/__init__.py +18 -0
- together/types/chat/chat_completion.py +60 -0
- together/types/chat/chat_completion_chunk.py +61 -0
- together/types/chat/chat_completion_structured_message_image_url_param.py +18 -0
- together/types/chat/chat_completion_structured_message_text_param.py +13 -0
- together/types/chat/chat_completion_structured_message_video_url_param.py +18 -0
- together/types/chat/chat_completion_usage.py +13 -0
- together/types/chat/chat_completion_warning.py +9 -0
- together/types/chat/completion_create_params.py +329 -0
- together/types/code_interpreter/__init__.py +5 -0
- together/types/code_interpreter/session_list_response.py +31 -0
- together/types/code_interpreter_execute_params.py +45 -0
- together/types/completion.py +42 -0
- together/types/completion_chunk.py +66 -0
- together/types/completion_create_params.py +138 -0
- together/types/dedicated_endpoint.py +44 -0
- together/types/embedding.py +24 -0
- together/types/embedding_create_params.py +31 -0
- together/types/endpoint_create_params.py +43 -0
- together/types/endpoint_list_avzones_response.py +11 -0
- together/types/endpoint_list_params.py +18 -0
- together/types/endpoint_list_response.py +41 -0
- together/types/endpoint_update_params.py +27 -0
- together/types/eval_create_params.py +263 -0
- together/types/eval_create_response.py +16 -0
- together/types/eval_list_params.py +21 -0
- together/types/eval_list_response.py +10 -0
- together/types/eval_status_response.py +100 -0
- together/types/evaluation_job.py +139 -0
- together/types/execute_response.py +108 -0
- together/types/file_delete_response.py +13 -0
- together/types/file_list.py +12 -0
- together/types/file_purpose.py +9 -0
- together/types/file_response.py +31 -0
- together/types/file_type.py +7 -0
- together/types/fine_tuning_cancel_response.py +194 -0
- together/types/fine_tuning_content_params.py +24 -0
- together/types/fine_tuning_delete_params.py +11 -0
- together/types/fine_tuning_delete_response.py +12 -0
- together/types/fine_tuning_list_checkpoints_response.py +21 -0
- together/types/fine_tuning_list_events_response.py +12 -0
- together/types/fine_tuning_list_response.py +199 -0
- together/types/finetune_event.py +41 -0
- together/types/finetune_event_type.py +33 -0
- together/types/finetune_response.py +177 -0
- together/types/hardware_list_params.py +16 -0
- together/types/hardware_list_response.py +58 -0
- together/types/image_data_b64.py +15 -0
- together/types/image_data_url.py +15 -0
- together/types/image_file.py +23 -0
- together/types/image_generate_params.py +85 -0
- together/types/job_list_response.py +47 -0
- together/types/job_retrieve_response.py +43 -0
- together/types/log_probs.py +18 -0
- together/types/model_list_response.py +10 -0
- together/types/model_object.py +42 -0
- together/types/model_upload_params.py +36 -0
- together/types/model_upload_response.py +23 -0
- together/types/rerank_create_params.py +36 -0
- together/types/rerank_create_response.py +36 -0
- together/types/tool_choice.py +23 -0
- together/types/tool_choice_param.py +23 -0
- together/types/tools_param.py +23 -0
- together/types/training_method_dpo.py +22 -0
- together/types/training_method_sft.py +18 -0
- together/types/video_create_params.py +86 -0
- together/types/video_create_response.py +10 -0
- together/types/video_job.py +57 -0
- together-2.0.0a6.dist-info/METADATA +729 -0
- together-2.0.0a6.dist-info/RECORD +165 -0
- {together-1.5.34.dist-info → together-2.0.0a6.dist-info}/WHEEL +1 -1
- together-2.0.0a6.dist-info/entry_points.txt +2 -0
- {together-1.5.34.dist-info → together-2.0.0a6.dist-info}/licenses/LICENSE +1 -1
- together/abstract/api_requestor.py +0 -770
- together/cli/api/chat.py +0 -298
- together/cli/api/completions.py +0 -119
- together/cli/api/images.py +0 -93
- together/cli/api/utils.py +0 -139
- together/client.py +0 -186
- together/error.py +0 -194
- together/filemanager.py +0 -635
- together/legacy/__init__.py +0 -0
- together/legacy/base.py +0 -27
- together/legacy/complete.py +0 -93
- together/legacy/embeddings.py +0 -27
- together/legacy/files.py +0 -146
- together/legacy/finetune.py +0 -177
- together/legacy/images.py +0 -27
- together/legacy/models.py +0 -44
- together/resources/batch.py +0 -165
- together/resources/code_interpreter.py +0 -82
- together/resources/evaluation.py +0 -808
- together/resources/finetune.py +0 -1388
- together/together_response.py +0 -50
- together/types/abstract.py +0 -26
- together/types/audio_speech.py +0 -311
- together/types/batch.py +0 -54
- together/types/chat_completions.py +0 -210
- together/types/code_interpreter.py +0 -57
- together/types/common.py +0 -67
- together/types/completions.py +0 -107
- together/types/embeddings.py +0 -35
- together/types/endpoints.py +0 -123
- together/types/error.py +0 -16
- together/types/evaluation.py +0 -93
- together/types/files.py +0 -93
- together/types/finetune.py +0 -464
- together/types/images.py +0 -42
- together/types/models.py +0 -96
- together/types/rerank.py +0 -43
- together/types/videos.py +0 -69
- together/utils/api_helpers.py +0 -124
- together/version.py +0 -6
- together-1.5.34.dist-info/METADATA +0 -583
- together-1.5.34.dist-info/RECORD +0 -77
- together-1.5.34.dist-info/entry_points.txt +0 -3
- /together/{abstract → lib/cli}/__init__.py +0 -0
- /together/{cli → lib/cli/api}/__init__.py +0 -0
- /together/{cli/api/__init__.py → py.typed} +0 -0
|
@@ -0,0 +1,45 @@
|
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Iterable
|
|
6
|
+
from typing_extensions import Literal, Required, TypedDict
|
|
7
|
+
|
|
8
|
+
__all__ = ["CodeInterpreterExecuteParams", "File"]
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class CodeInterpreterExecuteParams(TypedDict, total=False):
|
|
12
|
+
code: Required[str]
|
|
13
|
+
"""Code snippet to execute."""
|
|
14
|
+
|
|
15
|
+
language: Required[Literal["python"]]
|
|
16
|
+
"""Programming language for the code to execute.
|
|
17
|
+
|
|
18
|
+
Currently only supports Python, but more will be added.
|
|
19
|
+
"""
|
|
20
|
+
|
|
21
|
+
files: Iterable[File]
|
|
22
|
+
"""Files to upload to the session.
|
|
23
|
+
|
|
24
|
+
If present, files will be uploaded before executing the given code.
|
|
25
|
+
"""
|
|
26
|
+
|
|
27
|
+
session_id: str
|
|
28
|
+
"""Identifier of the current session.
|
|
29
|
+
|
|
30
|
+
Used to make follow-up calls. Requests will return an error if the session does
|
|
31
|
+
not belong to the caller or has expired.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class File(TypedDict, total=False):
|
|
36
|
+
content: Required[str]
|
|
37
|
+
|
|
38
|
+
encoding: Required[Literal["string", "base64"]]
|
|
39
|
+
"""Encoding of the file content.
|
|
40
|
+
|
|
41
|
+
Use `string` for text files such as code, and `base64` for binary files, such as
|
|
42
|
+
images.
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
name: Required[str]
|
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
+
|
|
3
|
+
from typing import List, Optional
|
|
4
|
+
from typing_extensions import Literal
|
|
5
|
+
|
|
6
|
+
from .._models import BaseModel
|
|
7
|
+
from .log_probs import LogProbs
|
|
8
|
+
from .chat.chat_completion_usage import ChatCompletionUsage
|
|
9
|
+
|
|
10
|
+
__all__ = ["Completion", "Choice", "Prompt"]
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class Choice(BaseModel):
|
|
14
|
+
finish_reason: Optional[Literal["stop", "eos", "length", "tool_calls", "function_call"]] = None
|
|
15
|
+
|
|
16
|
+
logprobs: Optional[LogProbs] = None
|
|
17
|
+
|
|
18
|
+
seed: Optional[int] = None
|
|
19
|
+
|
|
20
|
+
text: Optional[str] = None
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class Prompt(BaseModel):
|
|
24
|
+
logprobs: Optional[LogProbs] = None
|
|
25
|
+
|
|
26
|
+
text: Optional[str] = None
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class Completion(BaseModel):
|
|
30
|
+
id: str
|
|
31
|
+
|
|
32
|
+
choices: List[Choice]
|
|
33
|
+
|
|
34
|
+
created: int
|
|
35
|
+
|
|
36
|
+
model: str
|
|
37
|
+
|
|
38
|
+
object: Literal["text.completion"]
|
|
39
|
+
|
|
40
|
+
usage: Optional[ChatCompletionUsage] = None
|
|
41
|
+
|
|
42
|
+
prompt: Optional[List[Prompt]] = None
|
|
@@ -0,0 +1,66 @@
|
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
+
|
|
3
|
+
from typing import List, Optional
|
|
4
|
+
from typing_extensions import Literal
|
|
5
|
+
|
|
6
|
+
from .._models import BaseModel
|
|
7
|
+
from .tool_choice import ToolChoice
|
|
8
|
+
from .chat.chat_completion_usage import ChatCompletionUsage
|
|
9
|
+
|
|
10
|
+
__all__ = ["CompletionChunk", "Token", "Choice", "ChoiceDelta", "ChoiceDeltaFunctionCall"]
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class Token(BaseModel):
|
|
14
|
+
id: int
|
|
15
|
+
|
|
16
|
+
logprob: float
|
|
17
|
+
|
|
18
|
+
special: bool
|
|
19
|
+
|
|
20
|
+
text: str
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class ChoiceDeltaFunctionCall(BaseModel):
|
|
24
|
+
arguments: str
|
|
25
|
+
|
|
26
|
+
name: str
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
class ChoiceDelta(BaseModel):
|
|
30
|
+
role: Literal["system", "user", "assistant", "function", "tool"]
|
|
31
|
+
|
|
32
|
+
content: Optional[str] = None
|
|
33
|
+
|
|
34
|
+
function_call: Optional[ChoiceDeltaFunctionCall] = None
|
|
35
|
+
|
|
36
|
+
reasoning: Optional[str] = None
|
|
37
|
+
|
|
38
|
+
token_id: Optional[int] = None
|
|
39
|
+
|
|
40
|
+
tool_calls: Optional[List[ToolChoice]] = None
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
class Choice(BaseModel):
|
|
44
|
+
index: int
|
|
45
|
+
|
|
46
|
+
delta: Optional[ChoiceDelta] = None
|
|
47
|
+
|
|
48
|
+
text: Optional[str] = None
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
class CompletionChunk(BaseModel):
|
|
52
|
+
id: str
|
|
53
|
+
|
|
54
|
+
token: Token
|
|
55
|
+
|
|
56
|
+
choices: List[Choice]
|
|
57
|
+
|
|
58
|
+
finish_reason: Optional[Literal["stop", "eos", "length", "tool_calls", "function_call"]] = None
|
|
59
|
+
|
|
60
|
+
usage: Optional[ChatCompletionUsage] = None
|
|
61
|
+
|
|
62
|
+
created: Optional[int] = None
|
|
63
|
+
|
|
64
|
+
object: Optional[Literal["completion.chunk"]] = None
|
|
65
|
+
|
|
66
|
+
seed: Optional[int] = None
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Dict, Union
|
|
6
|
+
from typing_extensions import Literal, Required, TypedDict
|
|
7
|
+
|
|
8
|
+
from .._types import SequenceNotStr
|
|
9
|
+
|
|
10
|
+
__all__ = ["CompletionCreateParamsBase", "CompletionCreateParamsNonStreaming", "CompletionCreateParamsStreaming"]
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class CompletionCreateParamsBase(TypedDict, total=False):
|
|
14
|
+
model: Required[
|
|
15
|
+
Union[
|
|
16
|
+
Literal[
|
|
17
|
+
"meta-llama/Llama-2-70b-hf",
|
|
18
|
+
"mistralai/Mistral-7B-v0.1",
|
|
19
|
+
"mistralai/Mixtral-8x7B-v0.1",
|
|
20
|
+
"Meta-Llama/Llama-Guard-7b",
|
|
21
|
+
],
|
|
22
|
+
str,
|
|
23
|
+
]
|
|
24
|
+
]
|
|
25
|
+
"""The name of the model to query.
|
|
26
|
+
|
|
27
|
+
[See all of Together AI's chat models](https://docs.together.ai/docs/serverless-models#chat-models)
|
|
28
|
+
"""
|
|
29
|
+
|
|
30
|
+
prompt: Required[str]
|
|
31
|
+
"""A string providing context for the model to complete."""
|
|
32
|
+
|
|
33
|
+
echo: bool
|
|
34
|
+
"""If true, the response will contain the prompt.
|
|
35
|
+
|
|
36
|
+
Can be used with `logprobs` to return prompt logprobs.
|
|
37
|
+
"""
|
|
38
|
+
|
|
39
|
+
frequency_penalty: float
|
|
40
|
+
"""
|
|
41
|
+
A number between -2.0 and 2.0 where a positive value decreases the likelihood of
|
|
42
|
+
repeating tokens that have already been mentioned.
|
|
43
|
+
"""
|
|
44
|
+
|
|
45
|
+
logit_bias: Dict[str, float]
|
|
46
|
+
"""Adjusts the likelihood of specific tokens appearing in the generated output."""
|
|
47
|
+
|
|
48
|
+
logprobs: int
|
|
49
|
+
"""
|
|
50
|
+
An integer between 0 and 20 of the top k tokens to return log probabilities for
|
|
51
|
+
at each generation step, instead of just the sampled token. Log probabilities
|
|
52
|
+
help assess model confidence in token predictions.
|
|
53
|
+
"""
|
|
54
|
+
|
|
55
|
+
max_tokens: int
|
|
56
|
+
"""The maximum number of tokens to generate."""
|
|
57
|
+
|
|
58
|
+
min_p: float
|
|
59
|
+
"""A number between 0 and 1 that can be used as an alternative to top-p and top-k."""
|
|
60
|
+
|
|
61
|
+
n: int
|
|
62
|
+
"""The number of completions to generate for each prompt."""
|
|
63
|
+
|
|
64
|
+
presence_penalty: float
|
|
65
|
+
"""
|
|
66
|
+
A number between -2.0 and 2.0 where a positive value increases the likelihood of
|
|
67
|
+
a model talking about new topics.
|
|
68
|
+
"""
|
|
69
|
+
|
|
70
|
+
repetition_penalty: float
|
|
71
|
+
"""
|
|
72
|
+
A number that controls the diversity of generated text by reducing the
|
|
73
|
+
likelihood of repeated sequences. Higher values decrease repetition.
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
safety_model: Union[Literal["Meta-Llama/Llama-Guard-7b"], str]
|
|
77
|
+
"""The name of the moderation model used to validate tokens.
|
|
78
|
+
|
|
79
|
+
Choose from the available moderation models found
|
|
80
|
+
[here](https://docs.together.ai/docs/inference-models#moderation-models).
|
|
81
|
+
"""
|
|
82
|
+
|
|
83
|
+
seed: int
|
|
84
|
+
"""Seed value for reproducibility."""
|
|
85
|
+
|
|
86
|
+
stop: SequenceNotStr[str]
|
|
87
|
+
"""A list of string sequences that will truncate (stop) inference text output.
|
|
88
|
+
|
|
89
|
+
For example, "</s>" will stop generation as soon as the model generates the
|
|
90
|
+
given token.
|
|
91
|
+
"""
|
|
92
|
+
|
|
93
|
+
temperature: float
|
|
94
|
+
"""
|
|
95
|
+
A decimal number from 0-1 that determines the degree of randomness in the
|
|
96
|
+
response. A temperature less than 1 favors more correctness and is appropriate
|
|
97
|
+
for question answering or summarization. A value closer to 1 introduces more
|
|
98
|
+
randomness in the output.
|
|
99
|
+
"""
|
|
100
|
+
|
|
101
|
+
top_k: int
|
|
102
|
+
"""
|
|
103
|
+
An integer that's used to limit the number of choices for the next predicted
|
|
104
|
+
word or token. It specifies the maximum number of tokens to consider at each
|
|
105
|
+
step, based on their probability of occurrence. This technique helps to speed up
|
|
106
|
+
the generation process and can improve the quality of the generated text by
|
|
107
|
+
focusing on the most likely options.
|
|
108
|
+
"""
|
|
109
|
+
|
|
110
|
+
top_p: float
|
|
111
|
+
"""
|
|
112
|
+
A percentage (also called the nucleus parameter) that's used to dynamically
|
|
113
|
+
adjust the number of choices for each predicted token based on the cumulative
|
|
114
|
+
probabilities. It specifies a probability threshold below which all less likely
|
|
115
|
+
tokens are filtered out. This technique helps maintain diversity and generate
|
|
116
|
+
more fluent and natural-sounding text.
|
|
117
|
+
"""
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
class CompletionCreateParamsNonStreaming(CompletionCreateParamsBase, total=False):
|
|
121
|
+
stream: Literal[False]
|
|
122
|
+
"""
|
|
123
|
+
If true, stream tokens as Server-Sent Events as the model generates them instead
|
|
124
|
+
of waiting for the full model response. The stream terminates with
|
|
125
|
+
`data: [DONE]`. If false, return a single JSON object containing the results.
|
|
126
|
+
"""
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
class CompletionCreateParamsStreaming(CompletionCreateParamsBase):
|
|
130
|
+
stream: Required[Literal[True]]
|
|
131
|
+
"""
|
|
132
|
+
If true, stream tokens as Server-Sent Events as the model generates them instead
|
|
133
|
+
of waiting for the full model response. The stream terminates with
|
|
134
|
+
`data: [DONE]`. If false, return a single JSON object containing the results.
|
|
135
|
+
"""
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
CompletionCreateParams = Union[CompletionCreateParamsNonStreaming, CompletionCreateParamsStreaming]
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
+
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
from typing_extensions import Literal
|
|
5
|
+
|
|
6
|
+
from .._models import BaseModel
|
|
7
|
+
from .autoscaling import Autoscaling
|
|
8
|
+
|
|
9
|
+
__all__ = ["DedicatedEndpoint"]
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class DedicatedEndpoint(BaseModel):
|
|
13
|
+
id: str
|
|
14
|
+
"""Unique identifier for the endpoint"""
|
|
15
|
+
|
|
16
|
+
autoscaling: Autoscaling
|
|
17
|
+
"""Configuration for automatic scaling of the endpoint"""
|
|
18
|
+
|
|
19
|
+
created_at: datetime
|
|
20
|
+
"""Timestamp when the endpoint was created"""
|
|
21
|
+
|
|
22
|
+
display_name: str
|
|
23
|
+
"""Human-readable name for the endpoint"""
|
|
24
|
+
|
|
25
|
+
hardware: str
|
|
26
|
+
"""The hardware configuration used for this endpoint"""
|
|
27
|
+
|
|
28
|
+
model: str
|
|
29
|
+
"""The model deployed on this endpoint"""
|
|
30
|
+
|
|
31
|
+
name: str
|
|
32
|
+
"""System name for the endpoint"""
|
|
33
|
+
|
|
34
|
+
object: Literal["endpoint"]
|
|
35
|
+
"""The type of object"""
|
|
36
|
+
|
|
37
|
+
owner: str
|
|
38
|
+
"""The owner of this endpoint"""
|
|
39
|
+
|
|
40
|
+
state: Literal["PENDING", "STARTING", "STARTED", "STOPPING", "STOPPED", "ERROR"]
|
|
41
|
+
"""Current state of the endpoint"""
|
|
42
|
+
|
|
43
|
+
type: Literal["dedicated"]
|
|
44
|
+
"""The type of endpoint"""
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
+
|
|
3
|
+
from typing import List
|
|
4
|
+
from typing_extensions import Literal
|
|
5
|
+
|
|
6
|
+
from .._models import BaseModel
|
|
7
|
+
|
|
8
|
+
__all__ = ["Embedding", "Data"]
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class Data(BaseModel):
|
|
12
|
+
embedding: List[float]
|
|
13
|
+
|
|
14
|
+
index: int
|
|
15
|
+
|
|
16
|
+
object: Literal["embedding"]
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class Embedding(BaseModel):
|
|
20
|
+
data: List[Data]
|
|
21
|
+
|
|
22
|
+
model: str
|
|
23
|
+
|
|
24
|
+
object: Literal["list"]
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Union
|
|
6
|
+
from typing_extensions import Literal, Required, TypedDict
|
|
7
|
+
|
|
8
|
+
from .._types import SequenceNotStr
|
|
9
|
+
|
|
10
|
+
__all__ = ["EmbeddingCreateParams"]
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class EmbeddingCreateParams(TypedDict, total=False):
|
|
14
|
+
input: Required[Union[str, SequenceNotStr[str]]]
|
|
15
|
+
"""A string providing the text for the model to embed."""
|
|
16
|
+
|
|
17
|
+
model: Required[
|
|
18
|
+
Union[
|
|
19
|
+
Literal[
|
|
20
|
+
"WhereIsAI/UAE-Large-V1",
|
|
21
|
+
"BAAI/bge-large-en-v1.5",
|
|
22
|
+
"BAAI/bge-base-en-v1.5",
|
|
23
|
+
"togethercomputer/m2-bert-80M-8k-retrieval",
|
|
24
|
+
],
|
|
25
|
+
str,
|
|
26
|
+
]
|
|
27
|
+
]
|
|
28
|
+
"""The name of the embedding model to use.
|
|
29
|
+
|
|
30
|
+
[See all of Together AI's embedding models](https://docs.together.ai/docs/serverless-models#embedding-models)
|
|
31
|
+
"""
|
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Optional
|
|
6
|
+
from typing_extensions import Literal, Required, TypedDict
|
|
7
|
+
|
|
8
|
+
from .autoscaling_param import AutoscalingParam
|
|
9
|
+
|
|
10
|
+
__all__ = ["EndpointCreateParams"]
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class EndpointCreateParams(TypedDict, total=False):
|
|
14
|
+
autoscaling: Required[AutoscalingParam]
|
|
15
|
+
"""Configuration for automatic scaling of the endpoint"""
|
|
16
|
+
|
|
17
|
+
hardware: Required[str]
|
|
18
|
+
"""The hardware configuration to use for this endpoint"""
|
|
19
|
+
|
|
20
|
+
model: Required[str]
|
|
21
|
+
"""The model to deploy on this endpoint"""
|
|
22
|
+
|
|
23
|
+
availability_zone: str
|
|
24
|
+
"""Create the endpoint in a specified availability zone (e.g., us-central-4b)"""
|
|
25
|
+
|
|
26
|
+
disable_prompt_cache: bool
|
|
27
|
+
"""Whether to disable the prompt cache for this endpoint"""
|
|
28
|
+
|
|
29
|
+
disable_speculative_decoding: bool
|
|
30
|
+
"""Whether to disable speculative decoding for this endpoint"""
|
|
31
|
+
|
|
32
|
+
display_name: str
|
|
33
|
+
"""A human-readable name for the endpoint"""
|
|
34
|
+
|
|
35
|
+
inactive_timeout: Optional[int]
|
|
36
|
+
"""
|
|
37
|
+
The number of minutes of inactivity after which the endpoint will be
|
|
38
|
+
automatically stopped. Set to null, omit or set to 0 to disable automatic
|
|
39
|
+
timeout.
|
|
40
|
+
"""
|
|
41
|
+
|
|
42
|
+
state: Literal["STARTED", "STOPPED"]
|
|
43
|
+
"""The desired state of the endpoint"""
|
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
+
|
|
3
|
+
from typing import List
|
|
4
|
+
|
|
5
|
+
from .._models import BaseModel
|
|
6
|
+
|
|
7
|
+
__all__ = ["EndpointListAvzonesResponse"]
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class EndpointListAvzonesResponse(BaseModel):
|
|
11
|
+
avzones: List[str]
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing_extensions import Literal, TypedDict
|
|
6
|
+
|
|
7
|
+
__all__ = ["EndpointListParams"]
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class EndpointListParams(TypedDict, total=False):
|
|
11
|
+
mine: bool
|
|
12
|
+
"""If true, return only endpoints owned by the caller"""
|
|
13
|
+
|
|
14
|
+
type: Literal["dedicated", "serverless"]
|
|
15
|
+
"""Filter endpoints by type"""
|
|
16
|
+
|
|
17
|
+
usage_type: Literal["on-demand", "reserved"]
|
|
18
|
+
"""Filter endpoints by usage type"""
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
+
|
|
3
|
+
from typing import List
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from typing_extensions import Literal
|
|
6
|
+
|
|
7
|
+
from .._models import BaseModel
|
|
8
|
+
|
|
9
|
+
__all__ = ["EndpointListResponse", "Data"]
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class Data(BaseModel):
|
|
13
|
+
id: str
|
|
14
|
+
"""Unique identifier for the endpoint"""
|
|
15
|
+
|
|
16
|
+
created_at: datetime
|
|
17
|
+
"""Timestamp when the endpoint was created"""
|
|
18
|
+
|
|
19
|
+
model: str
|
|
20
|
+
"""The model deployed on this endpoint"""
|
|
21
|
+
|
|
22
|
+
name: str
|
|
23
|
+
"""System name for the endpoint"""
|
|
24
|
+
|
|
25
|
+
object: Literal["endpoint"]
|
|
26
|
+
"""The type of object"""
|
|
27
|
+
|
|
28
|
+
owner: str
|
|
29
|
+
"""The owner of this endpoint"""
|
|
30
|
+
|
|
31
|
+
state: Literal["PENDING", "STARTING", "STARTED", "STOPPING", "STOPPED", "ERROR"]
|
|
32
|
+
"""Current state of the endpoint"""
|
|
33
|
+
|
|
34
|
+
type: Literal["serverless", "dedicated"]
|
|
35
|
+
"""The type of endpoint"""
|
|
36
|
+
|
|
37
|
+
|
|
38
|
+
class EndpointListResponse(BaseModel):
|
|
39
|
+
data: List[Data]
|
|
40
|
+
|
|
41
|
+
object: Literal["list"]
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
# File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
from typing import Optional
|
|
6
|
+
from typing_extensions import Literal, TypedDict
|
|
7
|
+
|
|
8
|
+
from .autoscaling_param import AutoscalingParam
|
|
9
|
+
|
|
10
|
+
__all__ = ["EndpointUpdateParams"]
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class EndpointUpdateParams(TypedDict, total=False):
|
|
14
|
+
autoscaling: AutoscalingParam
|
|
15
|
+
"""New autoscaling configuration for the endpoint"""
|
|
16
|
+
|
|
17
|
+
display_name: str
|
|
18
|
+
"""A human-readable name for the endpoint"""
|
|
19
|
+
|
|
20
|
+
inactive_timeout: Optional[int]
|
|
21
|
+
"""
|
|
22
|
+
The number of minutes of inactivity after which the endpoint will be
|
|
23
|
+
automatically stopped. Set to 0 to disable automatic timeout.
|
|
24
|
+
"""
|
|
25
|
+
|
|
26
|
+
state: Literal["STARTED", "STOPPED"]
|
|
27
|
+
"""The desired state of the endpoint"""
|