together 1.5.35__py3-none-any.whl → 2.0.0a6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- together/__init__.py +101 -114
- together/_base_client.py +1995 -0
- together/_client.py +1033 -0
- together/_compat.py +219 -0
- together/_constants.py +14 -0
- together/_exceptions.py +108 -0
- together/_files.py +123 -0
- together/_models.py +857 -0
- together/_qs.py +150 -0
- together/_resource.py +43 -0
- together/_response.py +830 -0
- together/_streaming.py +370 -0
- together/_types.py +260 -0
- together/_utils/__init__.py +64 -0
- together/_utils/_compat.py +45 -0
- together/_utils/_datetime_parse.py +136 -0
- together/_utils/_logs.py +25 -0
- together/_utils/_proxy.py +65 -0
- together/_utils/_reflection.py +42 -0
- together/_utils/_resources_proxy.py +24 -0
- together/_utils/_streams.py +12 -0
- together/_utils/_sync.py +58 -0
- together/_utils/_transform.py +457 -0
- together/_utils/_typing.py +156 -0
- together/_utils/_utils.py +421 -0
- together/_version.py +4 -0
- together/lib/.keep +4 -0
- together/lib/__init__.py +23 -0
- together/{cli → lib/cli}/api/endpoints.py +66 -84
- together/{cli/api/evaluation.py → lib/cli/api/evals.py} +152 -43
- together/{cli → lib/cli}/api/files.py +20 -17
- together/{cli/api/finetune.py → lib/cli/api/fine_tuning.py} +116 -172
- together/{cli → lib/cli}/api/models.py +34 -27
- together/lib/cli/api/utils.py +50 -0
- together/{cli → lib/cli}/cli.py +16 -26
- together/{constants.py → lib/constants.py} +11 -24
- together/lib/resources/__init__.py +11 -0
- together/lib/resources/files.py +999 -0
- together/lib/resources/fine_tuning.py +280 -0
- together/lib/resources/models.py +35 -0
- together/lib/types/__init__.py +13 -0
- together/lib/types/error.py +9 -0
- together/lib/types/fine_tuning.py +397 -0
- together/{utils → lib/utils}/__init__.py +6 -14
- together/{utils → lib/utils}/_log.py +11 -16
- together/{utils → lib/utils}/files.py +90 -288
- together/lib/utils/serializer.py +10 -0
- together/{utils → lib/utils}/tools.py +19 -55
- together/resources/__init__.py +225 -39
- together/resources/audio/__init__.py +72 -48
- together/resources/audio/audio.py +198 -0
- together/resources/audio/speech.py +574 -128
- together/resources/audio/transcriptions.py +247 -261
- together/resources/audio/translations.py +221 -241
- together/resources/audio/voices.py +111 -41
- together/resources/batches.py +417 -0
- together/resources/chat/__init__.py +30 -21
- together/resources/chat/chat.py +102 -0
- together/resources/chat/completions.py +1063 -263
- together/resources/code_interpreter/__init__.py +33 -0
- together/resources/code_interpreter/code_interpreter.py +258 -0
- together/resources/code_interpreter/sessions.py +135 -0
- together/resources/completions.py +884 -225
- together/resources/embeddings.py +172 -68
- together/resources/endpoints.py +589 -490
- together/resources/evals.py +452 -0
- together/resources/files.py +397 -129
- together/resources/fine_tuning.py +1033 -0
- together/resources/hardware.py +181 -0
- together/resources/images.py +258 -104
- together/resources/jobs.py +214 -0
- together/resources/models.py +223 -193
- together/resources/rerank.py +190 -92
- together/resources/videos.py +286 -214
- together/types/__init__.py +66 -167
- together/types/audio/__init__.py +10 -0
- together/types/audio/speech_create_params.py +75 -0
- together/types/audio/transcription_create_params.py +54 -0
- together/types/audio/transcription_create_response.py +111 -0
- together/types/audio/translation_create_params.py +40 -0
- together/types/audio/translation_create_response.py +70 -0
- together/types/audio/voice_list_response.py +23 -0
- together/types/audio_speech_stream_chunk.py +16 -0
- together/types/autoscaling.py +13 -0
- together/types/autoscaling_param.py +15 -0
- together/types/batch_create_params.py +24 -0
- together/types/batch_create_response.py +14 -0
- together/types/batch_job.py +45 -0
- together/types/batch_list_response.py +10 -0
- together/types/chat/__init__.py +18 -0
- together/types/chat/chat_completion.py +60 -0
- together/types/chat/chat_completion_chunk.py +61 -0
- together/types/chat/chat_completion_structured_message_image_url_param.py +18 -0
- together/types/chat/chat_completion_structured_message_text_param.py +13 -0
- together/types/chat/chat_completion_structured_message_video_url_param.py +18 -0
- together/types/chat/chat_completion_usage.py +13 -0
- together/types/chat/chat_completion_warning.py +9 -0
- together/types/chat/completion_create_params.py +329 -0
- together/types/code_interpreter/__init__.py +5 -0
- together/types/code_interpreter/session_list_response.py +31 -0
- together/types/code_interpreter_execute_params.py +45 -0
- together/types/completion.py +42 -0
- together/types/completion_chunk.py +66 -0
- together/types/completion_create_params.py +138 -0
- together/types/dedicated_endpoint.py +44 -0
- together/types/embedding.py +24 -0
- together/types/embedding_create_params.py +31 -0
- together/types/endpoint_create_params.py +43 -0
- together/types/endpoint_list_avzones_response.py +11 -0
- together/types/endpoint_list_params.py +18 -0
- together/types/endpoint_list_response.py +41 -0
- together/types/endpoint_update_params.py +27 -0
- together/types/eval_create_params.py +263 -0
- together/types/eval_create_response.py +16 -0
- together/types/eval_list_params.py +21 -0
- together/types/eval_list_response.py +10 -0
- together/types/eval_status_response.py +100 -0
- together/types/evaluation_job.py +139 -0
- together/types/execute_response.py +108 -0
- together/types/file_delete_response.py +13 -0
- together/types/file_list.py +12 -0
- together/types/file_purpose.py +9 -0
- together/types/file_response.py +31 -0
- together/types/file_type.py +7 -0
- together/types/fine_tuning_cancel_response.py +194 -0
- together/types/fine_tuning_content_params.py +24 -0
- together/types/fine_tuning_delete_params.py +11 -0
- together/types/fine_tuning_delete_response.py +12 -0
- together/types/fine_tuning_list_checkpoints_response.py +21 -0
- together/types/fine_tuning_list_events_response.py +12 -0
- together/types/fine_tuning_list_response.py +199 -0
- together/types/finetune_event.py +41 -0
- together/types/finetune_event_type.py +33 -0
- together/types/finetune_response.py +177 -0
- together/types/hardware_list_params.py +16 -0
- together/types/hardware_list_response.py +58 -0
- together/types/image_data_b64.py +15 -0
- together/types/image_data_url.py +15 -0
- together/types/image_file.py +23 -0
- together/types/image_generate_params.py +85 -0
- together/types/job_list_response.py +47 -0
- together/types/job_retrieve_response.py +43 -0
- together/types/log_probs.py +18 -0
- together/types/model_list_response.py +10 -0
- together/types/model_object.py +42 -0
- together/types/model_upload_params.py +36 -0
- together/types/model_upload_response.py +23 -0
- together/types/rerank_create_params.py +36 -0
- together/types/rerank_create_response.py +36 -0
- together/types/tool_choice.py +23 -0
- together/types/tool_choice_param.py +23 -0
- together/types/tools_param.py +23 -0
- together/types/training_method_dpo.py +22 -0
- together/types/training_method_sft.py +18 -0
- together/types/video_create_params.py +86 -0
- together/types/video_create_response.py +10 -0
- together/types/video_job.py +57 -0
- together-2.0.0a6.dist-info/METADATA +729 -0
- together-2.0.0a6.dist-info/RECORD +165 -0
- {together-1.5.35.dist-info → together-2.0.0a6.dist-info}/WHEEL +1 -1
- together-2.0.0a6.dist-info/entry_points.txt +2 -0
- {together-1.5.35.dist-info → together-2.0.0a6.dist-info}/licenses/LICENSE +1 -1
- together/abstract/api_requestor.py +0 -770
- together/cli/api/chat.py +0 -298
- together/cli/api/completions.py +0 -119
- together/cli/api/images.py +0 -93
- together/cli/api/utils.py +0 -139
- together/client.py +0 -186
- together/error.py +0 -194
- together/filemanager.py +0 -635
- together/legacy/__init__.py +0 -0
- together/legacy/base.py +0 -27
- together/legacy/complete.py +0 -93
- together/legacy/embeddings.py +0 -27
- together/legacy/files.py +0 -146
- together/legacy/finetune.py +0 -177
- together/legacy/images.py +0 -27
- together/legacy/models.py +0 -44
- together/resources/batch.py +0 -165
- together/resources/code_interpreter.py +0 -82
- together/resources/evaluation.py +0 -808
- together/resources/finetune.py +0 -1388
- together/together_response.py +0 -50
- together/types/abstract.py +0 -26
- together/types/audio_speech.py +0 -311
- together/types/batch.py +0 -54
- together/types/chat_completions.py +0 -210
- together/types/code_interpreter.py +0 -57
- together/types/common.py +0 -67
- together/types/completions.py +0 -107
- together/types/embeddings.py +0 -35
- together/types/endpoints.py +0 -123
- together/types/error.py +0 -16
- together/types/evaluation.py +0 -93
- together/types/files.py +0 -93
- together/types/finetune.py +0 -465
- together/types/images.py +0 -42
- together/types/models.py +0 -96
- together/types/rerank.py +0 -43
- together/types/videos.py +0 -69
- together/utils/api_helpers.py +0 -124
- together/version.py +0 -6
- together-1.5.35.dist-info/METADATA +0 -583
- together-1.5.35.dist-info/RECORD +0 -77
- together-1.5.35.dist-info/entry_points.txt +0 -3
- /together/{abstract → lib/cli}/__init__.py +0 -0
- /together/{cli → lib/cli/api}/__init__.py +0 -0
- /together/{cli/api/__init__.py → py.typed} +0 -0
|
@@ -1,770 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import asyncio
|
|
4
|
-
import email.utils
|
|
5
|
-
import json
|
|
6
|
-
import sys
|
|
7
|
-
import threading
|
|
8
|
-
import time
|
|
9
|
-
from json import JSONDecodeError
|
|
10
|
-
from random import random
|
|
11
|
-
from typing import (
|
|
12
|
-
Any,
|
|
13
|
-
AsyncContextManager,
|
|
14
|
-
AsyncGenerator,
|
|
15
|
-
Dict,
|
|
16
|
-
Iterator,
|
|
17
|
-
Tuple,
|
|
18
|
-
overload,
|
|
19
|
-
)
|
|
20
|
-
from urllib.parse import urlencode, urlsplit, urlunsplit
|
|
21
|
-
|
|
22
|
-
import aiohttp
|
|
23
|
-
import requests
|
|
24
|
-
from tqdm.utils import CallbackIOWrapper
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
if sys.version_info >= (3, 8):
|
|
28
|
-
from typing import Literal
|
|
29
|
-
else:
|
|
30
|
-
from typing_extensions import Literal
|
|
31
|
-
|
|
32
|
-
import together
|
|
33
|
-
from together import error, utils
|
|
34
|
-
from together.constants import (
|
|
35
|
-
BASE_URL,
|
|
36
|
-
INITIAL_RETRY_DELAY,
|
|
37
|
-
MAX_CONNECTION_RETRIES,
|
|
38
|
-
MAX_RETRIES,
|
|
39
|
-
MAX_RETRY_DELAY,
|
|
40
|
-
MAX_SESSION_LIFETIME_SECS,
|
|
41
|
-
TIMEOUT_SECS,
|
|
42
|
-
)
|
|
43
|
-
from together.together_response import TogetherResponse
|
|
44
|
-
from together.types import TogetherClient, TogetherRequest
|
|
45
|
-
from together.types.error import TogetherErrorResponse
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
# Has one attribute per thread, 'session'.
|
|
49
|
-
_thread_context = threading.local()
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
def _build_api_url(url: str, query: str) -> str:
|
|
53
|
-
scheme, netloc, path, base_query, fragment = urlsplit(url)
|
|
54
|
-
|
|
55
|
-
if base_query:
|
|
56
|
-
query = "%s&%s" % (base_query, query)
|
|
57
|
-
|
|
58
|
-
return str(urlunsplit((scheme, netloc, path, query, fragment)))
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
def _make_session(max_retries: int | None = None) -> requests.Session:
|
|
62
|
-
if together.requestssession:
|
|
63
|
-
if isinstance(together.requestssession, requests.Session):
|
|
64
|
-
return together.requestssession
|
|
65
|
-
return together.requestssession()
|
|
66
|
-
s = requests.Session()
|
|
67
|
-
s.mount(
|
|
68
|
-
"https://",
|
|
69
|
-
requests.adapters.HTTPAdapter(max_retries=max_retries),
|
|
70
|
-
)
|
|
71
|
-
return s
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
def parse_stream_helper(line: bytes) -> str | None:
|
|
75
|
-
if line and line.startswith(b"data:"):
|
|
76
|
-
if line.startswith(b"data: "):
|
|
77
|
-
# SSE event may be valid when it contains whitespace
|
|
78
|
-
line = line[len(b"data: ") :]
|
|
79
|
-
else:
|
|
80
|
-
line = line[len(b"data:") :]
|
|
81
|
-
if line.strip().upper() == b"[DONE]":
|
|
82
|
-
# return here will cause GeneratorExit exception in urllib3
|
|
83
|
-
# and it will close http connection with TCP Reset
|
|
84
|
-
return None
|
|
85
|
-
else:
|
|
86
|
-
return line.decode("utf-8")
|
|
87
|
-
return None
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
def parse_stream(rbody: Iterator[bytes]) -> Iterator[str]:
|
|
91
|
-
for line in rbody:
|
|
92
|
-
_line = parse_stream_helper(line)
|
|
93
|
-
if _line is not None:
|
|
94
|
-
yield _line
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
async def parse_stream_async(rbody: aiohttp.StreamReader) -> AsyncGenerator[str, Any]:
|
|
98
|
-
async for line in rbody:
|
|
99
|
-
_line = parse_stream_helper(line)
|
|
100
|
-
if _line is not None:
|
|
101
|
-
yield _line
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
class APIRequestor:
|
|
105
|
-
def __init__(self, client: TogetherClient):
|
|
106
|
-
self.api_base = client.base_url or BASE_URL
|
|
107
|
-
self.api_key = client.api_key or utils.default_api_key()
|
|
108
|
-
self.retries = MAX_RETRIES if client.max_retries is None else client.max_retries
|
|
109
|
-
self.supplied_headers = client.supplied_headers
|
|
110
|
-
self.timeout = client.timeout or TIMEOUT_SECS
|
|
111
|
-
|
|
112
|
-
def _parse_retry_after_header(
|
|
113
|
-
self, response_headers: Dict[str, Any] | None = None
|
|
114
|
-
) -> float | None:
|
|
115
|
-
"""
|
|
116
|
-
Returns a float of the number of seconds (not milliseconds)
|
|
117
|
-
to wait after retrying, or None if unspecified.
|
|
118
|
-
|
|
119
|
-
About the Retry-After header:
|
|
120
|
-
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Retry-After
|
|
121
|
-
See also
|
|
122
|
-
https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Retry-After#syntax
|
|
123
|
-
"""
|
|
124
|
-
if not response_headers:
|
|
125
|
-
return None
|
|
126
|
-
|
|
127
|
-
# First, try the non-standard `retry-after-ms` header for milliseconds,
|
|
128
|
-
# which is more precise than integer-seconds `retry-after`
|
|
129
|
-
try:
|
|
130
|
-
retry_ms_header = response_headers.get("retry-after-ms", None)
|
|
131
|
-
return float(retry_ms_header) / 1000
|
|
132
|
-
except (TypeError, ValueError):
|
|
133
|
-
pass
|
|
134
|
-
|
|
135
|
-
# Next, try parsing `retry-after` header as seconds (allowing nonstandard floats).
|
|
136
|
-
retry_header = str(response_headers.get("retry-after"))
|
|
137
|
-
try:
|
|
138
|
-
# note: the spec indicates that this should only ever be an integer
|
|
139
|
-
# but if someone sends a float there's no reason for us to not respect it
|
|
140
|
-
return float(retry_header)
|
|
141
|
-
except (TypeError, ValueError):
|
|
142
|
-
pass
|
|
143
|
-
|
|
144
|
-
# Last, try parsing `retry-after` as a date.
|
|
145
|
-
retry_date_tuple = email.utils.parsedate_tz(retry_header)
|
|
146
|
-
if retry_date_tuple is None:
|
|
147
|
-
return None
|
|
148
|
-
|
|
149
|
-
retry_date = email.utils.mktime_tz(retry_date_tuple)
|
|
150
|
-
return float(retry_date - time.time())
|
|
151
|
-
|
|
152
|
-
def _calculate_retry_timeout(
|
|
153
|
-
self,
|
|
154
|
-
remaining_retries: int,
|
|
155
|
-
response_headers: Dict[str, Any] | None = None,
|
|
156
|
-
) -> float:
|
|
157
|
-
# If the API asks us to wait a certain amount of time (and it's a reasonable amount), just do what it says.
|
|
158
|
-
retry_after = self._parse_retry_after_header(response_headers)
|
|
159
|
-
if retry_after is not None and 0 < retry_after <= 60:
|
|
160
|
-
return retry_after
|
|
161
|
-
|
|
162
|
-
nb_retries = self.retries - remaining_retries
|
|
163
|
-
|
|
164
|
-
# Apply exponential backoff, but not more than the max.
|
|
165
|
-
sleep_seconds = min(INITIAL_RETRY_DELAY * pow(2.0, nb_retries), MAX_RETRY_DELAY)
|
|
166
|
-
|
|
167
|
-
# Apply some jitter, plus-or-minus half a second.
|
|
168
|
-
jitter = 1 - 0.25 * random()
|
|
169
|
-
timeout = sleep_seconds * jitter
|
|
170
|
-
return timeout if timeout >= 0 else 0
|
|
171
|
-
|
|
172
|
-
def _retry_request(
|
|
173
|
-
self,
|
|
174
|
-
options: TogetherRequest,
|
|
175
|
-
remaining_retries: int,
|
|
176
|
-
response_headers: Dict[str, Any] | None,
|
|
177
|
-
*,
|
|
178
|
-
stream: bool,
|
|
179
|
-
request_timeout: float | Tuple[float, float] | None = None,
|
|
180
|
-
) -> requests.Response:
|
|
181
|
-
remaining = remaining_retries - 1
|
|
182
|
-
if remaining == 1:
|
|
183
|
-
utils.log_debug("1 retry left")
|
|
184
|
-
else:
|
|
185
|
-
utils.log_debug(f"{remaining} retries left")
|
|
186
|
-
|
|
187
|
-
timeout = self._calculate_retry_timeout(remaining, response_headers)
|
|
188
|
-
("Retrying request to %s in %f seconds", options.url, timeout)
|
|
189
|
-
|
|
190
|
-
# In a synchronous context we are blocking the entire thread. Up to the library user to run the client in a
|
|
191
|
-
# different thread if necessary.
|
|
192
|
-
time.sleep(timeout)
|
|
193
|
-
|
|
194
|
-
return self.request_raw(
|
|
195
|
-
options=options,
|
|
196
|
-
stream=stream,
|
|
197
|
-
request_timeout=request_timeout,
|
|
198
|
-
remaining_retries=remaining,
|
|
199
|
-
)
|
|
200
|
-
|
|
201
|
-
@overload
|
|
202
|
-
def request(
|
|
203
|
-
self,
|
|
204
|
-
options: TogetherRequest,
|
|
205
|
-
stream: Literal[True],
|
|
206
|
-
remaining_retries: int | None = ...,
|
|
207
|
-
request_timeout: float | Tuple[float, float] | None = ...,
|
|
208
|
-
) -> Tuple[Iterator[TogetherResponse], bool, str]:
|
|
209
|
-
pass
|
|
210
|
-
|
|
211
|
-
@overload
|
|
212
|
-
def request(
|
|
213
|
-
self,
|
|
214
|
-
options: TogetherRequest,
|
|
215
|
-
stream: Literal[False] = ...,
|
|
216
|
-
remaining_retries: int | None = ...,
|
|
217
|
-
request_timeout: float | Tuple[float, float] | None = ...,
|
|
218
|
-
) -> Tuple[TogetherResponse, bool, str]:
|
|
219
|
-
pass
|
|
220
|
-
|
|
221
|
-
@overload
|
|
222
|
-
def request(
|
|
223
|
-
self,
|
|
224
|
-
options: TogetherRequest,
|
|
225
|
-
stream: bool = ...,
|
|
226
|
-
remaining_retries: int | None = ...,
|
|
227
|
-
request_timeout: float | Tuple[float, float] | None = ...,
|
|
228
|
-
) -> Tuple[TogetherResponse | Iterator[TogetherResponse], bool, str]:
|
|
229
|
-
pass
|
|
230
|
-
|
|
231
|
-
def request(
|
|
232
|
-
self,
|
|
233
|
-
options: TogetherRequest,
|
|
234
|
-
stream: bool = False,
|
|
235
|
-
remaining_retries: int | None = None,
|
|
236
|
-
request_timeout: float | Tuple[float, float] | None = None,
|
|
237
|
-
) -> Tuple[
|
|
238
|
-
TogetherResponse | Iterator[TogetherResponse],
|
|
239
|
-
bool,
|
|
240
|
-
str | None,
|
|
241
|
-
]:
|
|
242
|
-
result = self.request_raw(
|
|
243
|
-
options=options,
|
|
244
|
-
remaining_retries=remaining_retries or self.retries,
|
|
245
|
-
stream=stream,
|
|
246
|
-
request_timeout=request_timeout,
|
|
247
|
-
)
|
|
248
|
-
|
|
249
|
-
resp, got_stream = self._interpret_response(result, stream)
|
|
250
|
-
return resp, got_stream, self.api_key
|
|
251
|
-
|
|
252
|
-
@overload
|
|
253
|
-
async def arequest(
|
|
254
|
-
self,
|
|
255
|
-
options: TogetherRequest,
|
|
256
|
-
stream: Literal[True],
|
|
257
|
-
request_timeout: float | Tuple[float, float] | None = ...,
|
|
258
|
-
) -> Tuple[AsyncGenerator[TogetherResponse, None], bool, str]:
|
|
259
|
-
pass
|
|
260
|
-
|
|
261
|
-
@overload
|
|
262
|
-
async def arequest(
|
|
263
|
-
self,
|
|
264
|
-
options: TogetherRequest,
|
|
265
|
-
*,
|
|
266
|
-
stream: Literal[True],
|
|
267
|
-
request_timeout: float | Tuple[float, float] | None = ...,
|
|
268
|
-
) -> Tuple[AsyncGenerator[TogetherResponse, None], bool, str]:
|
|
269
|
-
pass
|
|
270
|
-
|
|
271
|
-
@overload
|
|
272
|
-
async def arequest(
|
|
273
|
-
self,
|
|
274
|
-
options: TogetherRequest,
|
|
275
|
-
stream: Literal[False] = ...,
|
|
276
|
-
request_timeout: float | Tuple[float, float] | None = ...,
|
|
277
|
-
) -> Tuple[TogetherResponse, bool, str]:
|
|
278
|
-
pass
|
|
279
|
-
|
|
280
|
-
@overload
|
|
281
|
-
async def arequest(
|
|
282
|
-
self,
|
|
283
|
-
options: TogetherRequest,
|
|
284
|
-
stream: bool = ...,
|
|
285
|
-
request_timeout: float | Tuple[float, float] | None = ...,
|
|
286
|
-
) -> Tuple[TogetherResponse | AsyncGenerator[TogetherResponse, None], bool, str]:
|
|
287
|
-
pass
|
|
288
|
-
|
|
289
|
-
async def arequest(
|
|
290
|
-
self,
|
|
291
|
-
options: TogetherRequest,
|
|
292
|
-
stream: bool = False,
|
|
293
|
-
request_timeout: float | Tuple[float, float] | None = None,
|
|
294
|
-
) -> Tuple[TogetherResponse | AsyncGenerator[TogetherResponse, None], bool, str]:
|
|
295
|
-
ctx = AioHTTPSession()
|
|
296
|
-
session = await ctx.__aenter__()
|
|
297
|
-
result = None
|
|
298
|
-
try:
|
|
299
|
-
result = await self.arequest_raw(
|
|
300
|
-
options,
|
|
301
|
-
session,
|
|
302
|
-
request_timeout=request_timeout,
|
|
303
|
-
)
|
|
304
|
-
resp, got_stream = await self._interpret_async_response(result, stream)
|
|
305
|
-
except Exception:
|
|
306
|
-
# Close the request before exiting session context.
|
|
307
|
-
if result is not None:
|
|
308
|
-
result.release()
|
|
309
|
-
await ctx.__aexit__(None, None, None)
|
|
310
|
-
raise
|
|
311
|
-
if got_stream:
|
|
312
|
-
|
|
313
|
-
async def wrap_resp() -> AsyncGenerator[TogetherResponse, None]:
|
|
314
|
-
assert isinstance(resp, AsyncGenerator)
|
|
315
|
-
try:
|
|
316
|
-
async for r in resp:
|
|
317
|
-
yield r
|
|
318
|
-
finally:
|
|
319
|
-
# Close the request before exiting session context. Important to do it here
|
|
320
|
-
# as if stream is not fully exhausted, we need to close the request nevertheless.
|
|
321
|
-
result.release()
|
|
322
|
-
await ctx.__aexit__(None, None, None)
|
|
323
|
-
|
|
324
|
-
return wrap_resp(), got_stream, self.api_key # type: ignore
|
|
325
|
-
else:
|
|
326
|
-
# Close the request before exiting session context.
|
|
327
|
-
result.release()
|
|
328
|
-
await ctx.__aexit__(None, None, None)
|
|
329
|
-
return resp, got_stream, self.api_key # type: ignore
|
|
330
|
-
|
|
331
|
-
@classmethod
|
|
332
|
-
def handle_error_response(
|
|
333
|
-
cls,
|
|
334
|
-
resp: TogetherResponse,
|
|
335
|
-
rcode: int,
|
|
336
|
-
stream_error: bool = False,
|
|
337
|
-
) -> Exception:
|
|
338
|
-
try:
|
|
339
|
-
assert isinstance(resp.data, dict)
|
|
340
|
-
error_resp = resp.data.get("error")
|
|
341
|
-
assert isinstance(
|
|
342
|
-
error_resp, dict
|
|
343
|
-
), f"Unexpected error response {error_resp}"
|
|
344
|
-
error_data = TogetherErrorResponse(**(error_resp))
|
|
345
|
-
except (KeyError, TypeError):
|
|
346
|
-
raise error.JSONError(
|
|
347
|
-
"Invalid response object from API: %r (HTTP response code "
|
|
348
|
-
"was %d)" % (resp.data, rcode),
|
|
349
|
-
http_status=rcode,
|
|
350
|
-
)
|
|
351
|
-
|
|
352
|
-
utils.log_info(
|
|
353
|
-
"Together API error received",
|
|
354
|
-
error_code=error_data.code,
|
|
355
|
-
error_type=error_data.type_,
|
|
356
|
-
error_message=error_data.message,
|
|
357
|
-
error_param=error_data.param,
|
|
358
|
-
stream_error=stream_error,
|
|
359
|
-
)
|
|
360
|
-
|
|
361
|
-
# Rate limits were previously coded as 400's with code 'rate_limit'
|
|
362
|
-
if rcode == 429:
|
|
363
|
-
return error.RateLimitError(
|
|
364
|
-
error_data,
|
|
365
|
-
http_status=rcode,
|
|
366
|
-
headers=resp._headers,
|
|
367
|
-
request_id=resp.request_id,
|
|
368
|
-
)
|
|
369
|
-
elif rcode in [400, 403, 404, 415, 422]:
|
|
370
|
-
return error.InvalidRequestError(
|
|
371
|
-
error_data,
|
|
372
|
-
http_status=rcode,
|
|
373
|
-
headers=resp._headers,
|
|
374
|
-
request_id=resp.request_id,
|
|
375
|
-
)
|
|
376
|
-
elif rcode == 401:
|
|
377
|
-
return error.AuthenticationError(
|
|
378
|
-
error_data,
|
|
379
|
-
http_status=rcode,
|
|
380
|
-
headers=resp._headers,
|
|
381
|
-
request_id=resp.request_id,
|
|
382
|
-
)
|
|
383
|
-
|
|
384
|
-
elif stream_error:
|
|
385
|
-
parts = [error_data.message, "(Error occurred while streaming.)"]
|
|
386
|
-
message = " ".join([p for p in parts if p is not None])
|
|
387
|
-
return error.APIError(
|
|
388
|
-
message,
|
|
389
|
-
http_status=rcode,
|
|
390
|
-
headers=resp._headers,
|
|
391
|
-
request_id=resp.request_id,
|
|
392
|
-
)
|
|
393
|
-
else:
|
|
394
|
-
return error.APIError(
|
|
395
|
-
error_data,
|
|
396
|
-
http_status=rcode,
|
|
397
|
-
headers=resp._headers,
|
|
398
|
-
request_id=resp.request_id,
|
|
399
|
-
)
|
|
400
|
-
|
|
401
|
-
@classmethod
|
|
402
|
-
def _validate_headers(
|
|
403
|
-
cls, supplied_headers: Dict[str, str] | None
|
|
404
|
-
) -> Dict[str, str]:
|
|
405
|
-
headers: Dict[str, str] = {}
|
|
406
|
-
if supplied_headers is None:
|
|
407
|
-
return headers
|
|
408
|
-
|
|
409
|
-
if not isinstance(supplied_headers, dict):
|
|
410
|
-
raise TypeError("Headers must be a dictionary")
|
|
411
|
-
|
|
412
|
-
for k, v in supplied_headers.items():
|
|
413
|
-
if not isinstance(k, str):
|
|
414
|
-
raise TypeError("Header keys must be strings")
|
|
415
|
-
if not isinstance(v, str):
|
|
416
|
-
raise TypeError("Header values must be strings")
|
|
417
|
-
headers[k] = v
|
|
418
|
-
|
|
419
|
-
# NOTE: It is possible to do more validation of the headers, but a request could always
|
|
420
|
-
# be made to the API manually with invalid headers, so we need to handle them server side.
|
|
421
|
-
|
|
422
|
-
return headers
|
|
423
|
-
|
|
424
|
-
def _prepare_request_raw(
|
|
425
|
-
self,
|
|
426
|
-
options: TogetherRequest,
|
|
427
|
-
absolute: bool = False,
|
|
428
|
-
) -> Tuple[str, Dict[str, str], Dict[str, str] | CallbackIOWrapper | bytes | None]:
|
|
429
|
-
abs_url = options.url if absolute else "%s%s" % (self.api_base, options.url)
|
|
430
|
-
headers = self._validate_headers(options.headers or self.supplied_headers)
|
|
431
|
-
|
|
432
|
-
data = None
|
|
433
|
-
data_bytes = None
|
|
434
|
-
if options.method.lower() == "get" or options.method.lower() == "delete":
|
|
435
|
-
if options.params:
|
|
436
|
-
encoded_params = urlencode(
|
|
437
|
-
[(k, v) for k, v in options.params.items() if v is not None]
|
|
438
|
-
)
|
|
439
|
-
abs_url = _build_api_url(abs_url, encoded_params)
|
|
440
|
-
elif options.method.lower() in {"post", "put", "patch"}:
|
|
441
|
-
if options.params and (options.files or options.override_headers):
|
|
442
|
-
data = options.params
|
|
443
|
-
elif options.params and not options.files:
|
|
444
|
-
data_bytes = json.dumps(options.params).encode()
|
|
445
|
-
headers["Content-Type"] = "application/json"
|
|
446
|
-
|
|
447
|
-
else:
|
|
448
|
-
raise error.APIConnectionError(
|
|
449
|
-
"Unrecognized HTTP method %r. This may indicate a bug in the "
|
|
450
|
-
"Together SDK. Please contact us by filling out https://www.together.ai/contact for "
|
|
451
|
-
"assistance." % (options.method,)
|
|
452
|
-
)
|
|
453
|
-
|
|
454
|
-
if not options.override_headers:
|
|
455
|
-
headers = utils.get_headers(options.method, self.api_key, headers)
|
|
456
|
-
|
|
457
|
-
utils.log_debug(
|
|
458
|
-
"Request to Together API",
|
|
459
|
-
method=options.method,
|
|
460
|
-
path=abs_url,
|
|
461
|
-
post_data=(data or data_bytes),
|
|
462
|
-
headers=json.dumps(headers),
|
|
463
|
-
)
|
|
464
|
-
|
|
465
|
-
return abs_url, headers, (data or data_bytes)
|
|
466
|
-
|
|
467
|
-
def request_raw(
|
|
468
|
-
self,
|
|
469
|
-
options: TogetherRequest,
|
|
470
|
-
remaining_retries: int,
|
|
471
|
-
*,
|
|
472
|
-
stream: bool = False,
|
|
473
|
-
request_timeout: float | Tuple[float, float] | None = None,
|
|
474
|
-
absolute: bool = False,
|
|
475
|
-
) -> requests.Response:
|
|
476
|
-
abs_url, headers, data = self._prepare_request_raw(options, absolute)
|
|
477
|
-
|
|
478
|
-
if not hasattr(_thread_context, "session"):
|
|
479
|
-
_thread_context.session = _make_session(MAX_CONNECTION_RETRIES)
|
|
480
|
-
_thread_context.session_create_time = time.time()
|
|
481
|
-
elif (
|
|
482
|
-
time.time() - getattr(_thread_context, "session_create_time", 0)
|
|
483
|
-
>= MAX_SESSION_LIFETIME_SECS
|
|
484
|
-
):
|
|
485
|
-
_thread_context.session.close()
|
|
486
|
-
_thread_context.session = _make_session(MAX_CONNECTION_RETRIES)
|
|
487
|
-
_thread_context.session_create_time = time.time()
|
|
488
|
-
|
|
489
|
-
result = None
|
|
490
|
-
try:
|
|
491
|
-
result = _thread_context.session.request(
|
|
492
|
-
options.method,
|
|
493
|
-
abs_url,
|
|
494
|
-
headers=headers,
|
|
495
|
-
data=data,
|
|
496
|
-
files=options.files,
|
|
497
|
-
stream=stream,
|
|
498
|
-
timeout=request_timeout or self.timeout,
|
|
499
|
-
proxies=_thread_context.session.proxies,
|
|
500
|
-
allow_redirects=options.allow_redirects,
|
|
501
|
-
)
|
|
502
|
-
except requests.exceptions.Timeout as e:
|
|
503
|
-
utils.log_debug("Encountered requests.exceptions.Timeout")
|
|
504
|
-
|
|
505
|
-
result_headers = dict(result.headers) if result is not None else {}
|
|
506
|
-
|
|
507
|
-
if remaining_retries > 0:
|
|
508
|
-
return self._retry_request(
|
|
509
|
-
options,
|
|
510
|
-
remaining_retries=remaining_retries,
|
|
511
|
-
response_headers=result_headers,
|
|
512
|
-
stream=stream,
|
|
513
|
-
request_timeout=request_timeout,
|
|
514
|
-
)
|
|
515
|
-
|
|
516
|
-
raise error.Timeout("Request timed out: {}".format(e)) from e
|
|
517
|
-
except requests.exceptions.RequestException as e:
|
|
518
|
-
utils.log_debug("Encountered requests.exceptions.RequestException")
|
|
519
|
-
|
|
520
|
-
result_headers = dict(result.headers) if result is not None else {}
|
|
521
|
-
|
|
522
|
-
if remaining_retries > 0:
|
|
523
|
-
return self._retry_request(
|
|
524
|
-
options,
|
|
525
|
-
remaining_retries=remaining_retries,
|
|
526
|
-
response_headers=result_headers,
|
|
527
|
-
stream=stream,
|
|
528
|
-
request_timeout=request_timeout,
|
|
529
|
-
)
|
|
530
|
-
|
|
531
|
-
raise error.APIConnectionError(
|
|
532
|
-
"Error communicating with API: {}".format(e)
|
|
533
|
-
) from e
|
|
534
|
-
|
|
535
|
-
# retry on 5XX error or rate-limit
|
|
536
|
-
if result is not None:
|
|
537
|
-
if 500 <= result.status_code < 600 or result.status_code == 429:
|
|
538
|
-
utils.log_debug(
|
|
539
|
-
f"Encountered requests.exceptions.HTTPError. Error code: {result.status_code}"
|
|
540
|
-
)
|
|
541
|
-
|
|
542
|
-
result_headers = dict(result.headers) if result is not None else {}
|
|
543
|
-
|
|
544
|
-
if remaining_retries > 0:
|
|
545
|
-
return self._retry_request(
|
|
546
|
-
options,
|
|
547
|
-
remaining_retries=remaining_retries,
|
|
548
|
-
response_headers=result_headers,
|
|
549
|
-
stream=stream,
|
|
550
|
-
request_timeout=request_timeout,
|
|
551
|
-
)
|
|
552
|
-
|
|
553
|
-
status_code = result.status_code if result is not None else 0
|
|
554
|
-
result_headers = dict(result.headers) if result is not None else {}
|
|
555
|
-
|
|
556
|
-
utils.log_debug(
|
|
557
|
-
"Together API response",
|
|
558
|
-
path=abs_url,
|
|
559
|
-
response_code=status_code,
|
|
560
|
-
processing_ms=result_headers.get("x-total-time"),
|
|
561
|
-
request_id=result_headers.get("CF-RAY"),
|
|
562
|
-
)
|
|
563
|
-
|
|
564
|
-
return result # type: ignore
|
|
565
|
-
|
|
566
|
-
async def arequest_raw(
|
|
567
|
-
self,
|
|
568
|
-
options: TogetherRequest,
|
|
569
|
-
session: aiohttp.ClientSession,
|
|
570
|
-
*,
|
|
571
|
-
request_timeout: float | Tuple[float, float] | None = None,
|
|
572
|
-
absolute: bool = False,
|
|
573
|
-
) -> aiohttp.ClientResponse:
|
|
574
|
-
abs_url, headers, data = self._prepare_request_raw(options, absolute)
|
|
575
|
-
|
|
576
|
-
if isinstance(request_timeout, tuple):
|
|
577
|
-
timeout = aiohttp.ClientTimeout(
|
|
578
|
-
connect=request_timeout[0],
|
|
579
|
-
total=request_timeout[1],
|
|
580
|
-
)
|
|
581
|
-
else:
|
|
582
|
-
timeout = aiohttp.ClientTimeout(total=request_timeout or self.timeout)
|
|
583
|
-
|
|
584
|
-
if options.files:
|
|
585
|
-
data, content_type = requests.models.RequestEncodingMixin._encode_files( # type: ignore
|
|
586
|
-
options.files, data
|
|
587
|
-
)
|
|
588
|
-
headers["Content-Type"] = content_type
|
|
589
|
-
|
|
590
|
-
try:
|
|
591
|
-
result = await session.request(
|
|
592
|
-
method=options.method,
|
|
593
|
-
url=abs_url,
|
|
594
|
-
headers=headers,
|
|
595
|
-
data=data,
|
|
596
|
-
timeout=timeout,
|
|
597
|
-
allow_redirects=options.allow_redirects,
|
|
598
|
-
)
|
|
599
|
-
utils.log_debug(
|
|
600
|
-
"Together API response",
|
|
601
|
-
path=abs_url,
|
|
602
|
-
response_code=result.status,
|
|
603
|
-
processing_ms=result.headers.get("x-total-time"),
|
|
604
|
-
request_id=result.headers.get("CF-RAY"),
|
|
605
|
-
)
|
|
606
|
-
# Don't read the whole stream for debug logging unless necessary.
|
|
607
|
-
if together.log == "debug":
|
|
608
|
-
utils.log_debug(
|
|
609
|
-
"API response body", body=result.content, headers=result.headers
|
|
610
|
-
)
|
|
611
|
-
return result
|
|
612
|
-
except (aiohttp.ServerTimeoutError, asyncio.TimeoutError) as e:
|
|
613
|
-
raise error.Timeout("Request timed out") from e
|
|
614
|
-
except aiohttp.ClientError as e:
|
|
615
|
-
raise error.APIConnectionError("Error communicating with Together") from e
|
|
616
|
-
|
|
617
|
-
def _interpret_response(
|
|
618
|
-
self, result: requests.Response, stream: bool
|
|
619
|
-
) -> Tuple[TogetherResponse | Iterator[TogetherResponse], bool]:
|
|
620
|
-
"""Returns the response(s) and a bool indicating whether it is a stream."""
|
|
621
|
-
content_type = result.headers.get("Content-Type", "")
|
|
622
|
-
|
|
623
|
-
if stream and "text/event-stream" in content_type:
|
|
624
|
-
# SSE format streaming
|
|
625
|
-
return (
|
|
626
|
-
self._interpret_response_line(
|
|
627
|
-
line, result.status_code, result.headers, stream=True
|
|
628
|
-
)
|
|
629
|
-
for line in parse_stream(result.iter_lines())
|
|
630
|
-
), True
|
|
631
|
-
elif stream and content_type in [
|
|
632
|
-
"audio/wav",
|
|
633
|
-
"audio/mpeg",
|
|
634
|
-
"application/octet-stream",
|
|
635
|
-
]:
|
|
636
|
-
# Binary audio streaming - return chunks as binary data
|
|
637
|
-
def binary_stream_generator() -> Iterator[TogetherResponse]:
|
|
638
|
-
for chunk in result.iter_content(chunk_size=8192):
|
|
639
|
-
if chunk: # Skip empty chunks
|
|
640
|
-
yield TogetherResponse(chunk, dict(result.headers))
|
|
641
|
-
|
|
642
|
-
return binary_stream_generator(), True
|
|
643
|
-
else:
|
|
644
|
-
# Non-streaming response
|
|
645
|
-
if content_type in ["application/octet-stream", "audio/wav", "audio/mpeg"]:
|
|
646
|
-
content = result.content
|
|
647
|
-
else:
|
|
648
|
-
content = result.content.decode("utf-8")
|
|
649
|
-
return (
|
|
650
|
-
self._interpret_response_line(
|
|
651
|
-
content,
|
|
652
|
-
result.status_code,
|
|
653
|
-
result.headers,
|
|
654
|
-
stream=False,
|
|
655
|
-
),
|
|
656
|
-
False,
|
|
657
|
-
)
|
|
658
|
-
|
|
659
|
-
async def _interpret_async_response(
|
|
660
|
-
self, result: aiohttp.ClientResponse, stream: bool
|
|
661
|
-
) -> (
|
|
662
|
-
tuple[AsyncGenerator[TogetherResponse, None], bool]
|
|
663
|
-
| tuple[TogetherResponse, bool]
|
|
664
|
-
):
|
|
665
|
-
"""Returns the response(s) and a bool indicating whether it is a stream."""
|
|
666
|
-
content_type = result.headers.get("Content-Type", "")
|
|
667
|
-
|
|
668
|
-
if stream and "text/event-stream" in content_type:
|
|
669
|
-
# SSE format streaming
|
|
670
|
-
return (
|
|
671
|
-
self._interpret_response_line(
|
|
672
|
-
line, result.status, result.headers, stream=True
|
|
673
|
-
)
|
|
674
|
-
async for line in parse_stream_async(result.content)
|
|
675
|
-
), True
|
|
676
|
-
elif stream and content_type in [
|
|
677
|
-
"audio/wav",
|
|
678
|
-
"audio/mpeg",
|
|
679
|
-
"application/octet-stream",
|
|
680
|
-
]:
|
|
681
|
-
# Binary audio streaming - return chunks as binary data
|
|
682
|
-
async def binary_stream_generator() -> (
|
|
683
|
-
AsyncGenerator[TogetherResponse, None]
|
|
684
|
-
):
|
|
685
|
-
async for chunk in result.content.iter_chunked(8192):
|
|
686
|
-
if chunk: # Skip empty chunks
|
|
687
|
-
yield TogetherResponse(chunk, dict(result.headers))
|
|
688
|
-
|
|
689
|
-
return binary_stream_generator(), True
|
|
690
|
-
else:
|
|
691
|
-
# Non-streaming response
|
|
692
|
-
try:
|
|
693
|
-
content = await result.read()
|
|
694
|
-
except (aiohttp.ServerTimeoutError, asyncio.TimeoutError) as e:
|
|
695
|
-
raise error.Timeout("Request timed out") from e
|
|
696
|
-
except aiohttp.ClientError as e:
|
|
697
|
-
utils.log_warn(e, body=result.content)
|
|
698
|
-
|
|
699
|
-
if content_type in ["application/octet-stream", "audio/wav", "audio/mpeg"]:
|
|
700
|
-
# Binary content - keep as bytes
|
|
701
|
-
response_content: str | bytes = content
|
|
702
|
-
else:
|
|
703
|
-
# Text content - decode to string
|
|
704
|
-
response_content = content.decode("utf-8")
|
|
705
|
-
|
|
706
|
-
return (
|
|
707
|
-
self._interpret_response_line(
|
|
708
|
-
response_content,
|
|
709
|
-
result.status,
|
|
710
|
-
result.headers,
|
|
711
|
-
stream=False,
|
|
712
|
-
),
|
|
713
|
-
False,
|
|
714
|
-
)
|
|
715
|
-
|
|
716
|
-
def _interpret_response_line(
|
|
717
|
-
self, rbody: str | bytes, rcode: int, rheaders: Any, stream: bool
|
|
718
|
-
) -> TogetherResponse:
|
|
719
|
-
# HTTP 204 response code does not have any content in the body.
|
|
720
|
-
if rcode == 204:
|
|
721
|
-
return TogetherResponse({}, rheaders)
|
|
722
|
-
|
|
723
|
-
if rcode == 503:
|
|
724
|
-
raise error.ServiceUnavailableError(
|
|
725
|
-
"The server is overloaded or not ready yet.",
|
|
726
|
-
http_status=rcode,
|
|
727
|
-
headers=rheaders,
|
|
728
|
-
)
|
|
729
|
-
|
|
730
|
-
try:
|
|
731
|
-
content_type = rheaders.get("Content-Type", "")
|
|
732
|
-
if isinstance(rbody, bytes):
|
|
733
|
-
data: Dict[str, Any] | bytes = rbody
|
|
734
|
-
elif "text/plain" in content_type:
|
|
735
|
-
data = {"message": rbody}
|
|
736
|
-
else:
|
|
737
|
-
data = json.loads(rbody)
|
|
738
|
-
except (JSONDecodeError, UnicodeDecodeError) as e:
|
|
739
|
-
raise error.APIError(
|
|
740
|
-
f"Error code: {rcode} -{rbody if isinstance(rbody, str) else rbody.decode()}",
|
|
741
|
-
http_status=rcode,
|
|
742
|
-
headers=rheaders,
|
|
743
|
-
) from e
|
|
744
|
-
resp = TogetherResponse(data, rheaders)
|
|
745
|
-
|
|
746
|
-
# Handle streaming errors
|
|
747
|
-
if not 200 <= rcode < 300:
|
|
748
|
-
raise self.handle_error_response(resp, rcode, stream_error=stream)
|
|
749
|
-
return resp
|
|
750
|
-
|
|
751
|
-
|
|
752
|
-
class AioHTTPSession(AsyncContextManager[aiohttp.ClientSession]):
|
|
753
|
-
def __init__(self) -> None:
|
|
754
|
-
self._session: aiohttp.ClientSession | None = None
|
|
755
|
-
self._should_close_session: bool = False
|
|
756
|
-
|
|
757
|
-
async def __aenter__(self) -> aiohttp.ClientSession:
|
|
758
|
-
self._session = together.aiosession.get()
|
|
759
|
-
if self._session is None:
|
|
760
|
-
self._session = await aiohttp.ClientSession().__aenter__()
|
|
761
|
-
self._should_close_session = True
|
|
762
|
-
|
|
763
|
-
return self._session
|
|
764
|
-
|
|
765
|
-
async def __aexit__(self, exc_type: Any, exc_value: Any, traceback: Any) -> None:
|
|
766
|
-
if self._session is None:
|
|
767
|
-
raise RuntimeError("Session is not initialized")
|
|
768
|
-
|
|
769
|
-
if self._should_close_session:
|
|
770
|
-
await self._session.__aexit__(exc_type, exc_value, traceback)
|