together 1.5.35__py3-none-any.whl → 2.0.0a6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- together/__init__.py +101 -114
- together/_base_client.py +1995 -0
- together/_client.py +1033 -0
- together/_compat.py +219 -0
- together/_constants.py +14 -0
- together/_exceptions.py +108 -0
- together/_files.py +123 -0
- together/_models.py +857 -0
- together/_qs.py +150 -0
- together/_resource.py +43 -0
- together/_response.py +830 -0
- together/_streaming.py +370 -0
- together/_types.py +260 -0
- together/_utils/__init__.py +64 -0
- together/_utils/_compat.py +45 -0
- together/_utils/_datetime_parse.py +136 -0
- together/_utils/_logs.py +25 -0
- together/_utils/_proxy.py +65 -0
- together/_utils/_reflection.py +42 -0
- together/_utils/_resources_proxy.py +24 -0
- together/_utils/_streams.py +12 -0
- together/_utils/_sync.py +58 -0
- together/_utils/_transform.py +457 -0
- together/_utils/_typing.py +156 -0
- together/_utils/_utils.py +421 -0
- together/_version.py +4 -0
- together/lib/.keep +4 -0
- together/lib/__init__.py +23 -0
- together/{cli → lib/cli}/api/endpoints.py +66 -84
- together/{cli/api/evaluation.py → lib/cli/api/evals.py} +152 -43
- together/{cli → lib/cli}/api/files.py +20 -17
- together/{cli/api/finetune.py → lib/cli/api/fine_tuning.py} +116 -172
- together/{cli → lib/cli}/api/models.py +34 -27
- together/lib/cli/api/utils.py +50 -0
- together/{cli → lib/cli}/cli.py +16 -26
- together/{constants.py → lib/constants.py} +11 -24
- together/lib/resources/__init__.py +11 -0
- together/lib/resources/files.py +999 -0
- together/lib/resources/fine_tuning.py +280 -0
- together/lib/resources/models.py +35 -0
- together/lib/types/__init__.py +13 -0
- together/lib/types/error.py +9 -0
- together/lib/types/fine_tuning.py +397 -0
- together/{utils → lib/utils}/__init__.py +6 -14
- together/{utils → lib/utils}/_log.py +11 -16
- together/{utils → lib/utils}/files.py +90 -288
- together/lib/utils/serializer.py +10 -0
- together/{utils → lib/utils}/tools.py +19 -55
- together/resources/__init__.py +225 -39
- together/resources/audio/__init__.py +72 -48
- together/resources/audio/audio.py +198 -0
- together/resources/audio/speech.py +574 -128
- together/resources/audio/transcriptions.py +247 -261
- together/resources/audio/translations.py +221 -241
- together/resources/audio/voices.py +111 -41
- together/resources/batches.py +417 -0
- together/resources/chat/__init__.py +30 -21
- together/resources/chat/chat.py +102 -0
- together/resources/chat/completions.py +1063 -263
- together/resources/code_interpreter/__init__.py +33 -0
- together/resources/code_interpreter/code_interpreter.py +258 -0
- together/resources/code_interpreter/sessions.py +135 -0
- together/resources/completions.py +884 -225
- together/resources/embeddings.py +172 -68
- together/resources/endpoints.py +589 -490
- together/resources/evals.py +452 -0
- together/resources/files.py +397 -129
- together/resources/fine_tuning.py +1033 -0
- together/resources/hardware.py +181 -0
- together/resources/images.py +258 -104
- together/resources/jobs.py +214 -0
- together/resources/models.py +223 -193
- together/resources/rerank.py +190 -92
- together/resources/videos.py +286 -214
- together/types/__init__.py +66 -167
- together/types/audio/__init__.py +10 -0
- together/types/audio/speech_create_params.py +75 -0
- together/types/audio/transcription_create_params.py +54 -0
- together/types/audio/transcription_create_response.py +111 -0
- together/types/audio/translation_create_params.py +40 -0
- together/types/audio/translation_create_response.py +70 -0
- together/types/audio/voice_list_response.py +23 -0
- together/types/audio_speech_stream_chunk.py +16 -0
- together/types/autoscaling.py +13 -0
- together/types/autoscaling_param.py +15 -0
- together/types/batch_create_params.py +24 -0
- together/types/batch_create_response.py +14 -0
- together/types/batch_job.py +45 -0
- together/types/batch_list_response.py +10 -0
- together/types/chat/__init__.py +18 -0
- together/types/chat/chat_completion.py +60 -0
- together/types/chat/chat_completion_chunk.py +61 -0
- together/types/chat/chat_completion_structured_message_image_url_param.py +18 -0
- together/types/chat/chat_completion_structured_message_text_param.py +13 -0
- together/types/chat/chat_completion_structured_message_video_url_param.py +18 -0
- together/types/chat/chat_completion_usage.py +13 -0
- together/types/chat/chat_completion_warning.py +9 -0
- together/types/chat/completion_create_params.py +329 -0
- together/types/code_interpreter/__init__.py +5 -0
- together/types/code_interpreter/session_list_response.py +31 -0
- together/types/code_interpreter_execute_params.py +45 -0
- together/types/completion.py +42 -0
- together/types/completion_chunk.py +66 -0
- together/types/completion_create_params.py +138 -0
- together/types/dedicated_endpoint.py +44 -0
- together/types/embedding.py +24 -0
- together/types/embedding_create_params.py +31 -0
- together/types/endpoint_create_params.py +43 -0
- together/types/endpoint_list_avzones_response.py +11 -0
- together/types/endpoint_list_params.py +18 -0
- together/types/endpoint_list_response.py +41 -0
- together/types/endpoint_update_params.py +27 -0
- together/types/eval_create_params.py +263 -0
- together/types/eval_create_response.py +16 -0
- together/types/eval_list_params.py +21 -0
- together/types/eval_list_response.py +10 -0
- together/types/eval_status_response.py +100 -0
- together/types/evaluation_job.py +139 -0
- together/types/execute_response.py +108 -0
- together/types/file_delete_response.py +13 -0
- together/types/file_list.py +12 -0
- together/types/file_purpose.py +9 -0
- together/types/file_response.py +31 -0
- together/types/file_type.py +7 -0
- together/types/fine_tuning_cancel_response.py +194 -0
- together/types/fine_tuning_content_params.py +24 -0
- together/types/fine_tuning_delete_params.py +11 -0
- together/types/fine_tuning_delete_response.py +12 -0
- together/types/fine_tuning_list_checkpoints_response.py +21 -0
- together/types/fine_tuning_list_events_response.py +12 -0
- together/types/fine_tuning_list_response.py +199 -0
- together/types/finetune_event.py +41 -0
- together/types/finetune_event_type.py +33 -0
- together/types/finetune_response.py +177 -0
- together/types/hardware_list_params.py +16 -0
- together/types/hardware_list_response.py +58 -0
- together/types/image_data_b64.py +15 -0
- together/types/image_data_url.py +15 -0
- together/types/image_file.py +23 -0
- together/types/image_generate_params.py +85 -0
- together/types/job_list_response.py +47 -0
- together/types/job_retrieve_response.py +43 -0
- together/types/log_probs.py +18 -0
- together/types/model_list_response.py +10 -0
- together/types/model_object.py +42 -0
- together/types/model_upload_params.py +36 -0
- together/types/model_upload_response.py +23 -0
- together/types/rerank_create_params.py +36 -0
- together/types/rerank_create_response.py +36 -0
- together/types/tool_choice.py +23 -0
- together/types/tool_choice_param.py +23 -0
- together/types/tools_param.py +23 -0
- together/types/training_method_dpo.py +22 -0
- together/types/training_method_sft.py +18 -0
- together/types/video_create_params.py +86 -0
- together/types/video_create_response.py +10 -0
- together/types/video_job.py +57 -0
- together-2.0.0a6.dist-info/METADATA +729 -0
- together-2.0.0a6.dist-info/RECORD +165 -0
- {together-1.5.35.dist-info → together-2.0.0a6.dist-info}/WHEEL +1 -1
- together-2.0.0a6.dist-info/entry_points.txt +2 -0
- {together-1.5.35.dist-info → together-2.0.0a6.dist-info}/licenses/LICENSE +1 -1
- together/abstract/api_requestor.py +0 -770
- together/cli/api/chat.py +0 -298
- together/cli/api/completions.py +0 -119
- together/cli/api/images.py +0 -93
- together/cli/api/utils.py +0 -139
- together/client.py +0 -186
- together/error.py +0 -194
- together/filemanager.py +0 -635
- together/legacy/__init__.py +0 -0
- together/legacy/base.py +0 -27
- together/legacy/complete.py +0 -93
- together/legacy/embeddings.py +0 -27
- together/legacy/files.py +0 -146
- together/legacy/finetune.py +0 -177
- together/legacy/images.py +0 -27
- together/legacy/models.py +0 -44
- together/resources/batch.py +0 -165
- together/resources/code_interpreter.py +0 -82
- together/resources/evaluation.py +0 -808
- together/resources/finetune.py +0 -1388
- together/together_response.py +0 -50
- together/types/abstract.py +0 -26
- together/types/audio_speech.py +0 -311
- together/types/batch.py +0 -54
- together/types/chat_completions.py +0 -210
- together/types/code_interpreter.py +0 -57
- together/types/common.py +0 -67
- together/types/completions.py +0 -107
- together/types/embeddings.py +0 -35
- together/types/endpoints.py +0 -123
- together/types/error.py +0 -16
- together/types/evaluation.py +0 -93
- together/types/files.py +0 -93
- together/types/finetune.py +0 -465
- together/types/images.py +0 -42
- together/types/models.py +0 -96
- together/types/rerank.py +0 -43
- together/types/videos.py +0 -69
- together/utils/api_helpers.py +0 -124
- together/version.py +0 -6
- together-1.5.35.dist-info/METADATA +0 -583
- together-1.5.35.dist-info/RECORD +0 -77
- together-1.5.35.dist-info/entry_points.txt +0 -3
- /together/{abstract → lib/cli}/__init__.py +0 -0
- /together/{cli → lib/cli/api}/__init__.py +0 -0
- /together/{cli/api/__init__.py → py.typed} +0 -0
together/filemanager.py
DELETED
|
@@ -1,635 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import math
|
|
4
|
-
import os
|
|
5
|
-
import shutil
|
|
6
|
-
import stat
|
|
7
|
-
import tempfile
|
|
8
|
-
import uuid
|
|
9
|
-
from concurrent.futures import Future, ThreadPoolExecutor, as_completed
|
|
10
|
-
from functools import partial
|
|
11
|
-
from pathlib import Path
|
|
12
|
-
from typing import Any, BinaryIO, Dict, List, Tuple
|
|
13
|
-
|
|
14
|
-
import requests
|
|
15
|
-
from filelock import FileLock
|
|
16
|
-
from requests.structures import CaseInsensitiveDict
|
|
17
|
-
from tqdm import tqdm
|
|
18
|
-
|
|
19
|
-
from together.abstract import api_requestor
|
|
20
|
-
from together.constants import (
|
|
21
|
-
DISABLE_TQDM,
|
|
22
|
-
DOWNLOAD_BLOCK_SIZE,
|
|
23
|
-
MAX_CONCURRENT_PARTS,
|
|
24
|
-
MAX_FILE_SIZE_GB,
|
|
25
|
-
MAX_RETRIES,
|
|
26
|
-
MIN_PART_SIZE_MB,
|
|
27
|
-
NUM_BYTES_IN_GB,
|
|
28
|
-
TARGET_PART_SIZE_MB,
|
|
29
|
-
MAX_MULTIPART_PARTS,
|
|
30
|
-
MULTIPART_UPLOAD_TIMEOUT,
|
|
31
|
-
)
|
|
32
|
-
from together.error import (
|
|
33
|
-
APIError,
|
|
34
|
-
AuthenticationError,
|
|
35
|
-
DownloadError,
|
|
36
|
-
FileTypeError,
|
|
37
|
-
ResponseError,
|
|
38
|
-
)
|
|
39
|
-
from together.together_response import TogetherResponse
|
|
40
|
-
from together.types import (
|
|
41
|
-
FilePurpose,
|
|
42
|
-
FileResponse,
|
|
43
|
-
FileType,
|
|
44
|
-
TogetherClient,
|
|
45
|
-
TogetherRequest,
|
|
46
|
-
)
|
|
47
|
-
from tqdm.utils import CallbackIOWrapper
|
|
48
|
-
import together.utils
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
def chmod_and_replace(src: Path, dst: Path) -> None:
|
|
52
|
-
"""Set correct permission before moving a blob from tmp directory to cache dir.
|
|
53
|
-
|
|
54
|
-
Do not take into account the `umask` from the process as there is no convenient way
|
|
55
|
-
to get it that is thread-safe.
|
|
56
|
-
"""
|
|
57
|
-
|
|
58
|
-
# Get umask by creating a temporary file in the cache folder.
|
|
59
|
-
tmp_file = dst.parent / f"tmp_{uuid.uuid4()}"
|
|
60
|
-
|
|
61
|
-
try:
|
|
62
|
-
tmp_file.touch()
|
|
63
|
-
|
|
64
|
-
cache_dir_mode = Path(tmp_file).stat().st_mode
|
|
65
|
-
|
|
66
|
-
os.chmod(src.as_posix(), stat.S_IMODE(cache_dir_mode))
|
|
67
|
-
|
|
68
|
-
finally:
|
|
69
|
-
tmp_file.unlink()
|
|
70
|
-
|
|
71
|
-
shutil.move(src.as_posix(), dst.as_posix())
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
def _get_file_size(
|
|
75
|
-
headers: CaseInsensitiveDict[str],
|
|
76
|
-
) -> int:
|
|
77
|
-
"""
|
|
78
|
-
Extracts file size from header
|
|
79
|
-
"""
|
|
80
|
-
total_size_in_bytes = 0
|
|
81
|
-
|
|
82
|
-
parts = headers.get("Content-Range", "").split(" ")
|
|
83
|
-
|
|
84
|
-
if len(parts) == 2:
|
|
85
|
-
range_parts = parts[1].split("/")
|
|
86
|
-
|
|
87
|
-
if len(range_parts) == 2:
|
|
88
|
-
total_size_in_bytes = int(range_parts[1])
|
|
89
|
-
|
|
90
|
-
return total_size_in_bytes
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
def _prepare_output(
|
|
94
|
-
headers: CaseInsensitiveDict[str],
|
|
95
|
-
step: int = -1,
|
|
96
|
-
output: Path | None = None,
|
|
97
|
-
remote_name: str | None = None,
|
|
98
|
-
) -> Path:
|
|
99
|
-
"""
|
|
100
|
-
Generates output file name from remote name and headers
|
|
101
|
-
"""
|
|
102
|
-
if output:
|
|
103
|
-
return output
|
|
104
|
-
|
|
105
|
-
content_type = str(headers.get("content-type"))
|
|
106
|
-
|
|
107
|
-
assert remote_name, (
|
|
108
|
-
"No model name found in fine_tune object. "
|
|
109
|
-
"Please specify an `output` file name."
|
|
110
|
-
)
|
|
111
|
-
|
|
112
|
-
if step > 0:
|
|
113
|
-
remote_name += f"-checkpoint-{step}"
|
|
114
|
-
|
|
115
|
-
if "x-tar" in content_type.lower():
|
|
116
|
-
remote_name += ".tar.gz"
|
|
117
|
-
|
|
118
|
-
else:
|
|
119
|
-
remote_name += ".tar.zst"
|
|
120
|
-
|
|
121
|
-
return Path(remote_name)
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
class DownloadManager:
|
|
125
|
-
def __init__(self, client: TogetherClient) -> None:
|
|
126
|
-
self._client = client
|
|
127
|
-
|
|
128
|
-
def get_file_metadata(
|
|
129
|
-
self,
|
|
130
|
-
url: str,
|
|
131
|
-
output: Path | None = None,
|
|
132
|
-
remote_name: str | None = None,
|
|
133
|
-
fetch_metadata: bool = False,
|
|
134
|
-
) -> Tuple[Path, int]:
|
|
135
|
-
"""
|
|
136
|
-
gets remote file head and parses out file name and file size
|
|
137
|
-
"""
|
|
138
|
-
|
|
139
|
-
if not fetch_metadata:
|
|
140
|
-
if isinstance(output, Path):
|
|
141
|
-
file_path = output
|
|
142
|
-
else:
|
|
143
|
-
assert isinstance(remote_name, str)
|
|
144
|
-
file_path = Path(remote_name)
|
|
145
|
-
|
|
146
|
-
return file_path, 0
|
|
147
|
-
|
|
148
|
-
requestor = api_requestor.APIRequestor(
|
|
149
|
-
client=self._client,
|
|
150
|
-
)
|
|
151
|
-
|
|
152
|
-
response = requestor.request_raw(
|
|
153
|
-
options=TogetherRequest(
|
|
154
|
-
method="GET",
|
|
155
|
-
url=url,
|
|
156
|
-
headers={"Range": "bytes=0-1"},
|
|
157
|
-
),
|
|
158
|
-
remaining_retries=MAX_RETRIES,
|
|
159
|
-
stream=False,
|
|
160
|
-
)
|
|
161
|
-
|
|
162
|
-
try:
|
|
163
|
-
response.raise_for_status()
|
|
164
|
-
except requests.exceptions.HTTPError as e:
|
|
165
|
-
raise APIError(
|
|
166
|
-
"Error fetching file metadata", http_status=response.status_code
|
|
167
|
-
) from e
|
|
168
|
-
|
|
169
|
-
headers = response.headers
|
|
170
|
-
|
|
171
|
-
assert isinstance(headers, CaseInsensitiveDict)
|
|
172
|
-
|
|
173
|
-
file_path = _prepare_output(
|
|
174
|
-
headers=headers,
|
|
175
|
-
output=output,
|
|
176
|
-
remote_name=remote_name,
|
|
177
|
-
)
|
|
178
|
-
|
|
179
|
-
file_size = _get_file_size(headers)
|
|
180
|
-
|
|
181
|
-
return file_path, file_size
|
|
182
|
-
|
|
183
|
-
def download(
|
|
184
|
-
self,
|
|
185
|
-
url: str,
|
|
186
|
-
output: Path | None = None,
|
|
187
|
-
remote_name: str | None = None,
|
|
188
|
-
fetch_metadata: bool = False,
|
|
189
|
-
) -> Tuple[str, int]:
|
|
190
|
-
requestor = api_requestor.APIRequestor(
|
|
191
|
-
client=self._client,
|
|
192
|
-
)
|
|
193
|
-
|
|
194
|
-
# pre-fetch remote file name and file size
|
|
195
|
-
file_path, file_size = self.get_file_metadata(
|
|
196
|
-
url, output, remote_name, fetch_metadata
|
|
197
|
-
)
|
|
198
|
-
|
|
199
|
-
temp_file_manager = partial(
|
|
200
|
-
tempfile.NamedTemporaryFile, mode="wb", dir=file_path.parent, delete=False
|
|
201
|
-
)
|
|
202
|
-
|
|
203
|
-
# Prevent parallel downloads of the same file with a lock.
|
|
204
|
-
lock_path = Path(file_path.as_posix() + ".lock")
|
|
205
|
-
|
|
206
|
-
with FileLock(lock_path.as_posix()):
|
|
207
|
-
with temp_file_manager() as temp_file:
|
|
208
|
-
response = requestor.request_raw(
|
|
209
|
-
options=TogetherRequest(
|
|
210
|
-
method="GET",
|
|
211
|
-
url=url,
|
|
212
|
-
),
|
|
213
|
-
remaining_retries=MAX_RETRIES,
|
|
214
|
-
stream=True,
|
|
215
|
-
request_timeout=3600,
|
|
216
|
-
)
|
|
217
|
-
|
|
218
|
-
try:
|
|
219
|
-
response.raise_for_status()
|
|
220
|
-
except Exception as e:
|
|
221
|
-
os.remove(lock_path)
|
|
222
|
-
raise APIError(
|
|
223
|
-
"Error downloading file", http_status=response.status_code
|
|
224
|
-
) from e
|
|
225
|
-
|
|
226
|
-
if not fetch_metadata:
|
|
227
|
-
file_size = int(response.headers.get("content-length", 0))
|
|
228
|
-
|
|
229
|
-
with tqdm(
|
|
230
|
-
total=file_size,
|
|
231
|
-
unit="B",
|
|
232
|
-
unit_scale=True,
|
|
233
|
-
desc=f"Downloading file {file_path.name}",
|
|
234
|
-
disable=bool(DISABLE_TQDM),
|
|
235
|
-
) as pbar:
|
|
236
|
-
for chunk in response.iter_content(DOWNLOAD_BLOCK_SIZE):
|
|
237
|
-
pbar.update(len(chunk))
|
|
238
|
-
temp_file.write(chunk)
|
|
239
|
-
|
|
240
|
-
# Raise exception if remote file size does not match downloaded file size
|
|
241
|
-
if os.stat(temp_file.name).st_size != file_size:
|
|
242
|
-
DownloadError(
|
|
243
|
-
f"Downloaded file size `{pbar.n}` bytes does not match "
|
|
244
|
-
f"remote file size `{file_size}` bytes."
|
|
245
|
-
)
|
|
246
|
-
|
|
247
|
-
# Moves temp file to output file path
|
|
248
|
-
chmod_and_replace(Path(temp_file.name), file_path)
|
|
249
|
-
|
|
250
|
-
os.remove(lock_path)
|
|
251
|
-
|
|
252
|
-
return str(file_path.resolve()), file_size
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
class UploadManager:
|
|
256
|
-
def __init__(self, client: TogetherClient) -> None:
|
|
257
|
-
self._client = client
|
|
258
|
-
|
|
259
|
-
@classmethod
|
|
260
|
-
def _redirect_error_handler(
|
|
261
|
-
cls, requestor: api_requestor.APIRequestor, response: requests.Response
|
|
262
|
-
) -> None:
|
|
263
|
-
if response.status_code == 401:
|
|
264
|
-
raise AuthenticationError(
|
|
265
|
-
"This job would exceed your free trial credits. "
|
|
266
|
-
"Please upgrade to a paid account through "
|
|
267
|
-
"Settings -> Billing on api.together.ai to continue.",
|
|
268
|
-
)
|
|
269
|
-
elif response.status_code != 302:
|
|
270
|
-
raise APIError(
|
|
271
|
-
f"Unexpected error raised by endpoint: {response.content.decode()}, headers: {response.headers}",
|
|
272
|
-
http_status=response.status_code,
|
|
273
|
-
)
|
|
274
|
-
|
|
275
|
-
def get_upload_url(
|
|
276
|
-
self,
|
|
277
|
-
url: str,
|
|
278
|
-
file: Path,
|
|
279
|
-
purpose: FilePurpose,
|
|
280
|
-
filetype: FileType,
|
|
281
|
-
) -> Tuple[str, str]:
|
|
282
|
-
data = {
|
|
283
|
-
"purpose": purpose.value,
|
|
284
|
-
"file_name": file.name,
|
|
285
|
-
"file_type": filetype.value,
|
|
286
|
-
}
|
|
287
|
-
|
|
288
|
-
requestor = api_requestor.APIRequestor(
|
|
289
|
-
client=self._client,
|
|
290
|
-
)
|
|
291
|
-
|
|
292
|
-
method = "POST"
|
|
293
|
-
|
|
294
|
-
headers = together.utils.get_headers(method, requestor.api_key)
|
|
295
|
-
|
|
296
|
-
response = requestor.request_raw(
|
|
297
|
-
options=TogetherRequest(
|
|
298
|
-
method=method,
|
|
299
|
-
url=url,
|
|
300
|
-
params=data,
|
|
301
|
-
allow_redirects=False,
|
|
302
|
-
override_headers=True,
|
|
303
|
-
headers=headers,
|
|
304
|
-
),
|
|
305
|
-
remaining_retries=MAX_RETRIES,
|
|
306
|
-
)
|
|
307
|
-
|
|
308
|
-
self._redirect_error_handler(requestor, response)
|
|
309
|
-
|
|
310
|
-
redirect_url = response.headers["Location"]
|
|
311
|
-
file_id = response.headers["X-Together-File-Id"]
|
|
312
|
-
|
|
313
|
-
return redirect_url, file_id
|
|
314
|
-
|
|
315
|
-
def callback(self, url: str) -> TogetherResponse:
|
|
316
|
-
requestor = api_requestor.APIRequestor(
|
|
317
|
-
client=self._client,
|
|
318
|
-
)
|
|
319
|
-
|
|
320
|
-
response, _, _ = requestor.request(
|
|
321
|
-
options=TogetherRequest(
|
|
322
|
-
method="POST",
|
|
323
|
-
url=url,
|
|
324
|
-
),
|
|
325
|
-
)
|
|
326
|
-
|
|
327
|
-
return response
|
|
328
|
-
|
|
329
|
-
def upload(
|
|
330
|
-
self,
|
|
331
|
-
url: str,
|
|
332
|
-
file: Path,
|
|
333
|
-
purpose: FilePurpose,
|
|
334
|
-
redirect: bool = False,
|
|
335
|
-
) -> FileResponse:
|
|
336
|
-
file_id = None
|
|
337
|
-
|
|
338
|
-
requestor = api_requestor.APIRequestor(
|
|
339
|
-
client=self._client,
|
|
340
|
-
)
|
|
341
|
-
|
|
342
|
-
redirect_url = None
|
|
343
|
-
if redirect:
|
|
344
|
-
if file.suffix == ".jsonl":
|
|
345
|
-
filetype = FileType.jsonl
|
|
346
|
-
elif file.suffix == ".parquet":
|
|
347
|
-
filetype = FileType.parquet
|
|
348
|
-
elif file.suffix == ".csv":
|
|
349
|
-
filetype = FileType.csv
|
|
350
|
-
else:
|
|
351
|
-
raise FileTypeError(
|
|
352
|
-
f"Unknown extension of file {file}. "
|
|
353
|
-
"Only files with extensions .jsonl and .parquet are supported."
|
|
354
|
-
)
|
|
355
|
-
redirect_url, file_id = self.get_upload_url(url, file, purpose, filetype)
|
|
356
|
-
|
|
357
|
-
file_size = os.stat(file).st_size
|
|
358
|
-
|
|
359
|
-
with tqdm(
|
|
360
|
-
total=file_size,
|
|
361
|
-
unit="B",
|
|
362
|
-
unit_scale=True,
|
|
363
|
-
desc=f"Uploading file {file.name}",
|
|
364
|
-
disable=bool(DISABLE_TQDM),
|
|
365
|
-
) as pbar:
|
|
366
|
-
with file.open("rb") as f:
|
|
367
|
-
wrapped_file = CallbackIOWrapper(pbar.update, f, "read")
|
|
368
|
-
|
|
369
|
-
if redirect:
|
|
370
|
-
callback_response = requestor.request_raw(
|
|
371
|
-
options=TogetherRequest(
|
|
372
|
-
method="PUT",
|
|
373
|
-
url=redirect_url,
|
|
374
|
-
params=wrapped_file,
|
|
375
|
-
override_headers=True,
|
|
376
|
-
),
|
|
377
|
-
absolute=True,
|
|
378
|
-
remaining_retries=MAX_RETRIES,
|
|
379
|
-
)
|
|
380
|
-
else:
|
|
381
|
-
response, _, _ = requestor.request(
|
|
382
|
-
options=TogetherRequest(
|
|
383
|
-
method="PUT",
|
|
384
|
-
url=url,
|
|
385
|
-
params=wrapped_file,
|
|
386
|
-
),
|
|
387
|
-
)
|
|
388
|
-
|
|
389
|
-
if redirect:
|
|
390
|
-
assert isinstance(callback_response, requests.Response)
|
|
391
|
-
|
|
392
|
-
if not callback_response.status_code == 200:
|
|
393
|
-
raise APIError(
|
|
394
|
-
f"Error during file upload: {callback_response.content.decode()}, headers: {callback_response.headers}",
|
|
395
|
-
http_status=callback_response.status_code,
|
|
396
|
-
)
|
|
397
|
-
|
|
398
|
-
response = self.callback(f"{url}/{file_id}/preprocess")
|
|
399
|
-
|
|
400
|
-
assert isinstance(response, TogetherResponse)
|
|
401
|
-
|
|
402
|
-
return FileResponse(**response.data)
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
class MultipartUploadManager:
|
|
406
|
-
"""Handles multipart uploads for large files"""
|
|
407
|
-
|
|
408
|
-
def __init__(self, client: TogetherClient) -> None:
|
|
409
|
-
self._client = client
|
|
410
|
-
self.max_concurrent_parts = MAX_CONCURRENT_PARTS
|
|
411
|
-
|
|
412
|
-
def upload(
|
|
413
|
-
self,
|
|
414
|
-
url: str,
|
|
415
|
-
file: Path,
|
|
416
|
-
purpose: FilePurpose,
|
|
417
|
-
) -> FileResponse:
|
|
418
|
-
"""Upload large file using multipart upload"""
|
|
419
|
-
|
|
420
|
-
file_size = os.stat(file).st_size
|
|
421
|
-
|
|
422
|
-
file_size_gb = file_size / NUM_BYTES_IN_GB
|
|
423
|
-
if file_size_gb > MAX_FILE_SIZE_GB:
|
|
424
|
-
raise FileTypeError(
|
|
425
|
-
f"File size {file_size_gb:.1f}GB exceeds maximum supported size of {MAX_FILE_SIZE_GB}GB"
|
|
426
|
-
)
|
|
427
|
-
|
|
428
|
-
part_size, num_parts = self._calculate_parts(file_size)
|
|
429
|
-
|
|
430
|
-
file_type = self._get_file_type(file)
|
|
431
|
-
upload_info = None
|
|
432
|
-
|
|
433
|
-
try:
|
|
434
|
-
upload_info = self._initiate_upload(
|
|
435
|
-
url, file, file_size, num_parts, purpose, file_type
|
|
436
|
-
)
|
|
437
|
-
|
|
438
|
-
completed_parts = self._upload_parts_concurrent(
|
|
439
|
-
file, upload_info, part_size
|
|
440
|
-
)
|
|
441
|
-
|
|
442
|
-
return self._complete_upload(
|
|
443
|
-
url, upload_info["upload_id"], upload_info["file_id"], completed_parts
|
|
444
|
-
)
|
|
445
|
-
|
|
446
|
-
except Exception as e:
|
|
447
|
-
# Cleanup on failure
|
|
448
|
-
if upload_info is not None:
|
|
449
|
-
self._abort_upload(
|
|
450
|
-
url, upload_info["upload_id"], upload_info["file_id"]
|
|
451
|
-
)
|
|
452
|
-
raise e
|
|
453
|
-
|
|
454
|
-
def _get_file_type(self, file: Path) -> str:
|
|
455
|
-
"""Get file type from extension, raising ValueError for unsupported extensions"""
|
|
456
|
-
if file.suffix == ".jsonl":
|
|
457
|
-
return "jsonl"
|
|
458
|
-
elif file.suffix == ".parquet":
|
|
459
|
-
return "parquet"
|
|
460
|
-
elif file.suffix == ".csv":
|
|
461
|
-
return "csv"
|
|
462
|
-
else:
|
|
463
|
-
raise ValueError(
|
|
464
|
-
f"Unsupported file extension: '{file.suffix}'. "
|
|
465
|
-
f"Supported extensions: .jsonl, .parquet, .csv"
|
|
466
|
-
)
|
|
467
|
-
|
|
468
|
-
def _calculate_parts(self, file_size: int) -> tuple[int, int]:
|
|
469
|
-
"""Calculate optimal part size and count"""
|
|
470
|
-
min_part_size = MIN_PART_SIZE_MB * 1024 * 1024 # 5MB
|
|
471
|
-
target_part_size = TARGET_PART_SIZE_MB * 1024 * 1024 # 100MB
|
|
472
|
-
|
|
473
|
-
if file_size <= target_part_size:
|
|
474
|
-
return file_size, 1
|
|
475
|
-
|
|
476
|
-
num_parts = min(MAX_MULTIPART_PARTS, math.ceil(file_size / target_part_size))
|
|
477
|
-
part_size = math.ceil(file_size / num_parts)
|
|
478
|
-
|
|
479
|
-
if part_size < min_part_size:
|
|
480
|
-
part_size = min_part_size
|
|
481
|
-
num_parts = math.ceil(file_size / part_size)
|
|
482
|
-
|
|
483
|
-
return part_size, num_parts
|
|
484
|
-
|
|
485
|
-
def _initiate_upload(
|
|
486
|
-
self,
|
|
487
|
-
url: str,
|
|
488
|
-
file: Path,
|
|
489
|
-
file_size: int,
|
|
490
|
-
num_parts: int,
|
|
491
|
-
purpose: FilePurpose,
|
|
492
|
-
file_type: str,
|
|
493
|
-
) -> Any:
|
|
494
|
-
"""Initiate multipart upload with backend"""
|
|
495
|
-
|
|
496
|
-
requestor = api_requestor.APIRequestor(client=self._client)
|
|
497
|
-
|
|
498
|
-
payload = {
|
|
499
|
-
"file_name": file.name,
|
|
500
|
-
"file_size": file_size,
|
|
501
|
-
"num_parts": num_parts,
|
|
502
|
-
"purpose": purpose.value,
|
|
503
|
-
"file_type": file_type,
|
|
504
|
-
}
|
|
505
|
-
|
|
506
|
-
response, _, _ = requestor.request(
|
|
507
|
-
options=TogetherRequest(
|
|
508
|
-
method="POST",
|
|
509
|
-
url="files/multipart/initiate",
|
|
510
|
-
params=payload,
|
|
511
|
-
),
|
|
512
|
-
)
|
|
513
|
-
|
|
514
|
-
return response.data
|
|
515
|
-
|
|
516
|
-
def _submit_part(
|
|
517
|
-
self,
|
|
518
|
-
executor: ThreadPoolExecutor,
|
|
519
|
-
f: BinaryIO,
|
|
520
|
-
part_info: Dict[str, Any],
|
|
521
|
-
part_size: int,
|
|
522
|
-
) -> Future[str]:
|
|
523
|
-
"""Submit a single part for upload and return the future"""
|
|
524
|
-
f.seek((part_info["PartNumber"] - 1) * part_size)
|
|
525
|
-
part_data = f.read(part_size)
|
|
526
|
-
return executor.submit(self._upload_single_part, part_info, part_data)
|
|
527
|
-
|
|
528
|
-
def _upload_parts_concurrent(
|
|
529
|
-
self, file: Path, upload_info: Dict[str, Any], part_size: int
|
|
530
|
-
) -> List[Dict[str, Any]]:
|
|
531
|
-
"""Upload file parts concurrently with progress tracking"""
|
|
532
|
-
|
|
533
|
-
parts = upload_info["parts"]
|
|
534
|
-
completed_parts = []
|
|
535
|
-
|
|
536
|
-
with ThreadPoolExecutor(max_workers=self.max_concurrent_parts) as executor:
|
|
537
|
-
with tqdm(total=len(parts), desc="Uploading parts", unit="part") as pbar:
|
|
538
|
-
with open(file, "rb") as f:
|
|
539
|
-
future_to_part = {}
|
|
540
|
-
part_index = 0
|
|
541
|
-
|
|
542
|
-
# Submit initial batch limited by max_concurrent_parts
|
|
543
|
-
for _ in range(min(self.max_concurrent_parts, len(parts))):
|
|
544
|
-
part_info = parts[part_index]
|
|
545
|
-
future = self._submit_part(executor, f, part_info, part_size)
|
|
546
|
-
future_to_part[future] = part_info["PartNumber"]
|
|
547
|
-
part_index += 1
|
|
548
|
-
|
|
549
|
-
# Process completions and submit new parts (sliding window)
|
|
550
|
-
while future_to_part:
|
|
551
|
-
done_future = next(as_completed(future_to_part))
|
|
552
|
-
part_number = future_to_part.pop(done_future)
|
|
553
|
-
|
|
554
|
-
try:
|
|
555
|
-
etag = done_future.result()
|
|
556
|
-
completed_parts.append(
|
|
557
|
-
{"part_number": part_number, "etag": etag}
|
|
558
|
-
)
|
|
559
|
-
pbar.update(1)
|
|
560
|
-
except Exception as e:
|
|
561
|
-
raise Exception(f"Failed to upload part {part_number}: {e}")
|
|
562
|
-
|
|
563
|
-
# Submit next part if available
|
|
564
|
-
if part_index < len(parts):
|
|
565
|
-
part_info = parts[part_index]
|
|
566
|
-
future = self._submit_part(
|
|
567
|
-
executor, f, part_info, part_size
|
|
568
|
-
)
|
|
569
|
-
future_to_part[future] = part_info["PartNumber"]
|
|
570
|
-
part_index += 1
|
|
571
|
-
|
|
572
|
-
completed_parts.sort(key=lambda x: x["part_number"])
|
|
573
|
-
return completed_parts
|
|
574
|
-
|
|
575
|
-
def _upload_single_part(self, part_info: Dict[str, Any], part_data: bytes) -> str:
|
|
576
|
-
"""Upload a single part and return ETag"""
|
|
577
|
-
|
|
578
|
-
response = requests.put(
|
|
579
|
-
part_info["URL"],
|
|
580
|
-
data=part_data,
|
|
581
|
-
headers=part_info.get("Headers", {}),
|
|
582
|
-
timeout=MULTIPART_UPLOAD_TIMEOUT,
|
|
583
|
-
)
|
|
584
|
-
response.raise_for_status()
|
|
585
|
-
|
|
586
|
-
etag = response.headers.get("ETag", "").strip('"')
|
|
587
|
-
if not etag:
|
|
588
|
-
raise ResponseError(f"No ETag returned for part {part_info['PartNumber']}")
|
|
589
|
-
|
|
590
|
-
return etag
|
|
591
|
-
|
|
592
|
-
def _complete_upload(
|
|
593
|
-
self,
|
|
594
|
-
url: str,
|
|
595
|
-
upload_id: str,
|
|
596
|
-
file_id: str,
|
|
597
|
-
completed_parts: List[Dict[str, Any]],
|
|
598
|
-
) -> FileResponse:
|
|
599
|
-
"""Complete the multipart upload"""
|
|
600
|
-
|
|
601
|
-
requestor = api_requestor.APIRequestor(client=self._client)
|
|
602
|
-
|
|
603
|
-
payload = {
|
|
604
|
-
"upload_id": upload_id,
|
|
605
|
-
"file_id": file_id,
|
|
606
|
-
"parts": completed_parts,
|
|
607
|
-
}
|
|
608
|
-
|
|
609
|
-
response, _, _ = requestor.request(
|
|
610
|
-
options=TogetherRequest(
|
|
611
|
-
method="POST",
|
|
612
|
-
url="files/multipart/complete",
|
|
613
|
-
params=payload,
|
|
614
|
-
),
|
|
615
|
-
)
|
|
616
|
-
|
|
617
|
-
return FileResponse(**response.data.get("file", response.data))
|
|
618
|
-
|
|
619
|
-
def _abort_upload(self, url: str, upload_id: str, file_id: str) -> None:
|
|
620
|
-
"""Abort the multipart upload"""
|
|
621
|
-
|
|
622
|
-
requestor = api_requestor.APIRequestor(client=self._client)
|
|
623
|
-
|
|
624
|
-
payload = {
|
|
625
|
-
"upload_id": upload_id,
|
|
626
|
-
"file_id": file_id,
|
|
627
|
-
}
|
|
628
|
-
|
|
629
|
-
requestor.request(
|
|
630
|
-
options=TogetherRequest(
|
|
631
|
-
method="POST",
|
|
632
|
-
url="files/multipart/abort",
|
|
633
|
-
params=payload,
|
|
634
|
-
),
|
|
635
|
-
)
|
together/legacy/__init__.py
DELETED
|
File without changes
|
together/legacy/base.py
DELETED
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
import functools
|
|
2
|
-
import warnings
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
API_KEY_WARNING = (
|
|
6
|
-
"The use of together.api_key is deprecated and will be removed in the next major release. "
|
|
7
|
-
"Please set the TOGETHER_API_KEY environment variable instead."
|
|
8
|
-
)
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
def deprecated(func): # type: ignore
|
|
12
|
-
"""
|
|
13
|
-
This is a decorator which can be used to mark functions
|
|
14
|
-
as deprecated. It will result in a warning being emitted
|
|
15
|
-
when the function is used.
|
|
16
|
-
"""
|
|
17
|
-
|
|
18
|
-
@functools.wraps(func)
|
|
19
|
-
def new_func(*args, **kwargs): # type: ignore
|
|
20
|
-
warnings.warn(
|
|
21
|
-
f"Call to deprecated function {func.__name__}.",
|
|
22
|
-
category=DeprecationWarning,
|
|
23
|
-
stacklevel=2,
|
|
24
|
-
)
|
|
25
|
-
return func(*args, **kwargs)
|
|
26
|
-
|
|
27
|
-
return new_func
|