together 1.5.4__tar.gz → 1.5.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {together-1.5.4 → together-1.5.6}/PKG-INFO +26 -2
- {together-1.5.4 → together-1.5.6}/README.md +24 -0
- {together-1.5.4 → together-1.5.6}/pyproject.toml +4 -4
- {together-1.5.4 → together-1.5.6}/src/together/filemanager.py +2 -1
- {together-1.5.4 → together-1.5.6}/src/together/resources/code_interpreter.py +28 -4
- {together-1.5.4 → together-1.5.6}/src/together/resources/finetune.py +43 -33
- {together-1.5.4 → together-1.5.6}/src/together/types/__init__.py +8 -8
- {together-1.5.4 → together-1.5.6}/src/together/types/code_interpreter.py +11 -0
- {together-1.5.4 → together-1.5.6}/src/together/types/finetune.py +16 -10
- {together-1.5.4 → together-1.5.6}/LICENSE +0 -0
- {together-1.5.4 → together-1.5.6}/src/together/__init__.py +0 -0
- {together-1.5.4 → together-1.5.6}/src/together/abstract/__init__.py +0 -0
- {together-1.5.4 → together-1.5.6}/src/together/abstract/api_requestor.py +0 -0
- {together-1.5.4 → together-1.5.6}/src/together/cli/__init__.py +0 -0
- {together-1.5.4 → together-1.5.6}/src/together/cli/api/__init__.py +0 -0
- {together-1.5.4 → together-1.5.6}/src/together/cli/api/chat.py +0 -0
- {together-1.5.4 → together-1.5.6}/src/together/cli/api/completions.py +0 -0
- {together-1.5.4 → together-1.5.6}/src/together/cli/api/endpoints.py +0 -0
- {together-1.5.4 → together-1.5.6}/src/together/cli/api/files.py +0 -0
- {together-1.5.4 → together-1.5.6}/src/together/cli/api/finetune.py +0 -0
- {together-1.5.4 → together-1.5.6}/src/together/cli/api/images.py +0 -0
- {together-1.5.4 → together-1.5.6}/src/together/cli/api/models.py +0 -0
- {together-1.5.4 → together-1.5.6}/src/together/cli/api/utils.py +0 -0
- {together-1.5.4 → together-1.5.6}/src/together/cli/cli.py +0 -0
- {together-1.5.4 → together-1.5.6}/src/together/client.py +0 -0
- {together-1.5.4 → together-1.5.6}/src/together/constants.py +0 -0
- {together-1.5.4 → together-1.5.6}/src/together/error.py +0 -0
- {together-1.5.4 → together-1.5.6}/src/together/legacy/__init__.py +0 -0
- {together-1.5.4 → together-1.5.6}/src/together/legacy/base.py +0 -0
- {together-1.5.4 → together-1.5.6}/src/together/legacy/complete.py +0 -0
- {together-1.5.4 → together-1.5.6}/src/together/legacy/embeddings.py +0 -0
- {together-1.5.4 → together-1.5.6}/src/together/legacy/files.py +0 -0
- {together-1.5.4 → together-1.5.6}/src/together/legacy/finetune.py +0 -0
- {together-1.5.4 → together-1.5.6}/src/together/legacy/images.py +0 -0
- {together-1.5.4 → together-1.5.6}/src/together/legacy/models.py +0 -0
- {together-1.5.4 → together-1.5.6}/src/together/resources/__init__.py +0 -0
- {together-1.5.4 → together-1.5.6}/src/together/resources/audio/__init__.py +0 -0
- {together-1.5.4 → together-1.5.6}/src/together/resources/audio/speech.py +0 -0
- {together-1.5.4 → together-1.5.6}/src/together/resources/chat/__init__.py +0 -0
- {together-1.5.4 → together-1.5.6}/src/together/resources/chat/completions.py +0 -0
- {together-1.5.4 → together-1.5.6}/src/together/resources/completions.py +0 -0
- {together-1.5.4 → together-1.5.6}/src/together/resources/embeddings.py +0 -0
- {together-1.5.4 → together-1.5.6}/src/together/resources/endpoints.py +0 -0
- {together-1.5.4 → together-1.5.6}/src/together/resources/files.py +0 -0
- {together-1.5.4 → together-1.5.6}/src/together/resources/images.py +0 -0
- {together-1.5.4 → together-1.5.6}/src/together/resources/models.py +0 -0
- {together-1.5.4 → together-1.5.6}/src/together/resources/rerank.py +0 -0
- {together-1.5.4 → together-1.5.6}/src/together/together_response.py +0 -0
- {together-1.5.4 → together-1.5.6}/src/together/types/abstract.py +0 -0
- {together-1.5.4 → together-1.5.6}/src/together/types/audio_speech.py +0 -0
- {together-1.5.4 → together-1.5.6}/src/together/types/chat_completions.py +0 -0
- {together-1.5.4 → together-1.5.6}/src/together/types/common.py +0 -0
- {together-1.5.4 → together-1.5.6}/src/together/types/completions.py +0 -0
- {together-1.5.4 → together-1.5.6}/src/together/types/embeddings.py +0 -0
- {together-1.5.4 → together-1.5.6}/src/together/types/endpoints.py +0 -0
- {together-1.5.4 → together-1.5.6}/src/together/types/error.py +0 -0
- {together-1.5.4 → together-1.5.6}/src/together/types/files.py +0 -0
- {together-1.5.4 → together-1.5.6}/src/together/types/images.py +0 -0
- {together-1.5.4 → together-1.5.6}/src/together/types/models.py +0 -0
- {together-1.5.4 → together-1.5.6}/src/together/types/rerank.py +0 -0
- {together-1.5.4 → together-1.5.6}/src/together/utils/__init__.py +0 -0
- {together-1.5.4 → together-1.5.6}/src/together/utils/_log.py +0 -0
- {together-1.5.4 → together-1.5.6}/src/together/utils/api_helpers.py +0 -0
- {together-1.5.4 → together-1.5.6}/src/together/utils/files.py +0 -0
- {together-1.5.4 → together-1.5.6}/src/together/utils/tools.py +0 -0
- {together-1.5.4 → together-1.5.6}/src/together/version.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: together
|
|
3
|
-
Version: 1.5.
|
|
3
|
+
Version: 1.5.6
|
|
4
4
|
Summary: Python client for Together's Cloud Platform!
|
|
5
5
|
License: Apache-2.0
|
|
6
6
|
Author: Together AI
|
|
@@ -23,7 +23,7 @@ Requires-Dist: pillow (>=11.1.0,<12.0.0)
|
|
|
23
23
|
Requires-Dist: pyarrow (>=10.0.1)
|
|
24
24
|
Requires-Dist: pydantic (>=2.6.3,<3.0.0)
|
|
25
25
|
Requires-Dist: requests (>=2.31.0,<3.0.0)
|
|
26
|
-
Requires-Dist: rich (>=13.8.1,<
|
|
26
|
+
Requires-Dist: rich (>=13.8.1,<15.0.0)
|
|
27
27
|
Requires-Dist: tabulate (>=0.9.0,<0.10.0)
|
|
28
28
|
Requires-Dist: tqdm (>=4.66.2,<5.0.0)
|
|
29
29
|
Requires-Dist: typer (>=0.9,<0.16)
|
|
@@ -220,6 +220,30 @@ async def async_chat_completion(messages):
|
|
|
220
220
|
asyncio.run(async_chat_completion(messages))
|
|
221
221
|
```
|
|
222
222
|
|
|
223
|
+
#### Fetching logprobs
|
|
224
|
+
|
|
225
|
+
Logprobs are logarithms of token-level generation probabilities that indicate the likelihood of the generated token based on the previous tokens in the context. Logprobs allow us to estimate the model's confidence in its outputs, which can be used to decide how to optimally consume the model's output (e.g. rejecting low confidence outputs, retrying or ensembling model outputs etc).
|
|
226
|
+
|
|
227
|
+
```python
|
|
228
|
+
from together import Together
|
|
229
|
+
|
|
230
|
+
client = Together()
|
|
231
|
+
|
|
232
|
+
response = client.chat.completions.create(
|
|
233
|
+
model="mistralai/Mixtral-8x7B-Instruct-v0.1",
|
|
234
|
+
messages=[{"role": "user", "content": "tell me about new york"}],
|
|
235
|
+
logprobs=1
|
|
236
|
+
)
|
|
237
|
+
|
|
238
|
+
response_lobprobs = response.choices[0].logprobs
|
|
239
|
+
|
|
240
|
+
print(dict(zip(response_lobprobs.tokens, response_lobprobs.token_logprobs)))
|
|
241
|
+
# {'New': -2.384e-07, ' York': 0.0, ',': 0.0, ' also': -0.20703125, ' known': -0.20214844, ' as': -8.34465e-07, ... }
|
|
242
|
+
```
|
|
243
|
+
|
|
244
|
+
More details about using logprobs in Together's API can be found [here](https://docs.together.ai/docs/logprobs).
|
|
245
|
+
|
|
246
|
+
|
|
223
247
|
### Completions
|
|
224
248
|
|
|
225
249
|
Completions are for code and language models shown [here](https://docs.together.ai/docs/inference-models). Below, a code model example is shown.
|
|
@@ -186,6 +186,30 @@ async def async_chat_completion(messages):
|
|
|
186
186
|
asyncio.run(async_chat_completion(messages))
|
|
187
187
|
```
|
|
188
188
|
|
|
189
|
+
#### Fetching logprobs
|
|
190
|
+
|
|
191
|
+
Logprobs are logarithms of token-level generation probabilities that indicate the likelihood of the generated token based on the previous tokens in the context. Logprobs allow us to estimate the model's confidence in its outputs, which can be used to decide how to optimally consume the model's output (e.g. rejecting low confidence outputs, retrying or ensembling model outputs etc).
|
|
192
|
+
|
|
193
|
+
```python
|
|
194
|
+
from together import Together
|
|
195
|
+
|
|
196
|
+
client = Together()
|
|
197
|
+
|
|
198
|
+
response = client.chat.completions.create(
|
|
199
|
+
model="mistralai/Mixtral-8x7B-Instruct-v0.1",
|
|
200
|
+
messages=[{"role": "user", "content": "tell me about new york"}],
|
|
201
|
+
logprobs=1
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
response_lobprobs = response.choices[0].logprobs
|
|
205
|
+
|
|
206
|
+
print(dict(zip(response_lobprobs.tokens, response_lobprobs.token_logprobs)))
|
|
207
|
+
# {'New': -2.384e-07, ' York': 0.0, ',': 0.0, ' also': -0.20703125, ' known': -0.20214844, ' as': -8.34465e-07, ... }
|
|
208
|
+
```
|
|
209
|
+
|
|
210
|
+
More details about using logprobs in Together's API can be found [here](https://docs.together.ai/docs/logprobs).
|
|
211
|
+
|
|
212
|
+
|
|
189
213
|
### Completions
|
|
190
214
|
|
|
191
215
|
Completions are for code and language models shown [here](https://docs.together.ai/docs/inference-models). Below, a code model example is shown.
|
|
@@ -12,7 +12,7 @@ build-backend = "poetry.masonry.api"
|
|
|
12
12
|
|
|
13
13
|
[tool.poetry]
|
|
14
14
|
name = "together"
|
|
15
|
-
version = "1.5.
|
|
15
|
+
version = "1.5.6"
|
|
16
16
|
authors = ["Together AI <support@together.ai>"]
|
|
17
17
|
description = "Python client for Together's Cloud Platform!"
|
|
18
18
|
readme = "README.md"
|
|
@@ -29,7 +29,7 @@ homepage = "https://github.com/togethercomputer/together-python"
|
|
|
29
29
|
python = "^3.10"
|
|
30
30
|
typer = ">=0.9,<0.16"
|
|
31
31
|
requests = "^2.31.0"
|
|
32
|
-
rich = "
|
|
32
|
+
rich = ">=13.8.1,<15.0.0"
|
|
33
33
|
tqdm = "^4.66.2"
|
|
34
34
|
tabulate = "^0.9.0"
|
|
35
35
|
pydantic = "^2.6.3"
|
|
@@ -49,10 +49,10 @@ optional = true
|
|
|
49
49
|
|
|
50
50
|
[tool.poetry.group.quality.dependencies]
|
|
51
51
|
black = ">=23.1,<26.0"
|
|
52
|
-
ruff = ">=0.3.2,<0.
|
|
52
|
+
ruff = ">=0.3.2,<0.12.0"
|
|
53
53
|
types-tqdm = "^4.65.0.0"
|
|
54
54
|
types-tabulate = "^0.9.0.3"
|
|
55
|
-
pre-commit = "4.
|
|
55
|
+
pre-commit = "4.2.0"
|
|
56
56
|
types-requests = "^2.31.0.20240218"
|
|
57
57
|
pyarrow-stubs = ">=10.0.1.7,<20240831.0.0.0"
|
|
58
58
|
mypy = "^1.9.0"
|
|
@@ -378,7 +378,8 @@ class UploadManager:
|
|
|
378
378
|
|
|
379
379
|
if not callback_response.status_code == 200:
|
|
380
380
|
raise APIError(
|
|
381
|
-
f"Error
|
|
381
|
+
f"Error during file upload: {callback_response.content.decode()}, headers: {callback_response.headers}",
|
|
382
|
+
http_status=callback_response.status_code,
|
|
382
383
|
)
|
|
383
384
|
|
|
384
385
|
response = self.callback(f"{url}/{file_id}/preprocess")
|
|
@@ -1,11 +1,12 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
from typing import Dict, Literal, Optional
|
|
3
|
+
from typing import Any, Dict, List, Literal, Optional
|
|
4
|
+
from pydantic import ValidationError
|
|
4
5
|
|
|
5
6
|
from together.abstract import api_requestor
|
|
6
7
|
from together.together_response import TogetherResponse
|
|
7
8
|
from together.types import TogetherClient, TogetherRequest
|
|
8
|
-
from together.types.code_interpreter import ExecuteResponse
|
|
9
|
+
from together.types.code_interpreter import ExecuteResponse, FileInput
|
|
9
10
|
|
|
10
11
|
|
|
11
12
|
class CodeInterpreter:
|
|
@@ -19,22 +20,28 @@ class CodeInterpreter:
|
|
|
19
20
|
code: str,
|
|
20
21
|
language: Literal["python"],
|
|
21
22
|
session_id: Optional[str] = None,
|
|
23
|
+
files: Optional[List[Dict[str, Any]]] = None,
|
|
22
24
|
) -> ExecuteResponse:
|
|
23
|
-
"""Execute a code snippet.
|
|
25
|
+
"""Execute a code snippet, optionally with files.
|
|
24
26
|
|
|
25
27
|
Args:
|
|
26
28
|
code (str): Code snippet to execute
|
|
27
29
|
language (str): Programming language for the code to execute. Currently only supports Python.
|
|
28
30
|
session_id (str, optional): Identifier of the current session. Used to make follow-up calls.
|
|
31
|
+
files (List[Dict], optional): Files to upload to the session before executing the code.
|
|
29
32
|
|
|
30
33
|
Returns:
|
|
31
34
|
ExecuteResponse: Object containing execution results and outputs
|
|
35
|
+
|
|
36
|
+
Raises:
|
|
37
|
+
ValidationError: If any dictionary in the `files` list does not conform to the
|
|
38
|
+
required structure or types.
|
|
32
39
|
"""
|
|
33
40
|
requestor = api_requestor.APIRequestor(
|
|
34
41
|
client=self._client,
|
|
35
42
|
)
|
|
36
43
|
|
|
37
|
-
data: Dict[str,
|
|
44
|
+
data: Dict[str, Any] = {
|
|
38
45
|
"code": code,
|
|
39
46
|
"language": language,
|
|
40
47
|
}
|
|
@@ -42,6 +49,23 @@ class CodeInterpreter:
|
|
|
42
49
|
if session_id is not None:
|
|
43
50
|
data["session_id"] = session_id
|
|
44
51
|
|
|
52
|
+
if files is not None:
|
|
53
|
+
serialized_files = []
|
|
54
|
+
try:
|
|
55
|
+
for file_dict in files:
|
|
56
|
+
# Validate the dictionary by creating a FileInput instance
|
|
57
|
+
validated_file = FileInput(**file_dict)
|
|
58
|
+
# Serialize the validated model back to a dict for the API call
|
|
59
|
+
serialized_files.append(validated_file.model_dump())
|
|
60
|
+
except ValidationError as e:
|
|
61
|
+
raise ValueError(f"Invalid file input format: {e}") from e
|
|
62
|
+
except TypeError as e:
|
|
63
|
+
raise ValueError(
|
|
64
|
+
f"Invalid file input: Each item in 'files' must be a dictionary. Error: {e}"
|
|
65
|
+
) from e
|
|
66
|
+
|
|
67
|
+
data["files"] = serialized_files
|
|
68
|
+
|
|
45
69
|
# Use absolute URL to bypass the /v1 prefix
|
|
46
70
|
response, _, _ = requestor.request(
|
|
47
71
|
options=TogetherRequest(
|
|
@@ -2,7 +2,7 @@ from __future__ import annotations
|
|
|
2
2
|
|
|
3
3
|
import re
|
|
4
4
|
from pathlib import Path
|
|
5
|
-
from typing import
|
|
5
|
+
from typing import List, Literal
|
|
6
6
|
|
|
7
7
|
from rich import print as rprint
|
|
8
8
|
|
|
@@ -10,37 +10,38 @@ from together.abstract import api_requestor
|
|
|
10
10
|
from together.filemanager import DownloadManager
|
|
11
11
|
from together.together_response import TogetherResponse
|
|
12
12
|
from together.types import (
|
|
13
|
+
CosineLRScheduler,
|
|
14
|
+
CosineLRSchedulerArgs,
|
|
15
|
+
FinetuneCheckpoint,
|
|
13
16
|
FinetuneDownloadResult,
|
|
14
17
|
FinetuneList,
|
|
15
18
|
FinetuneListEvents,
|
|
19
|
+
FinetuneLRScheduler,
|
|
16
20
|
FinetuneRequest,
|
|
17
21
|
FinetuneResponse,
|
|
18
22
|
FinetuneTrainingLimits,
|
|
19
23
|
FullTrainingType,
|
|
24
|
+
LinearLRScheduler,
|
|
25
|
+
LinearLRSchedulerArgs,
|
|
20
26
|
LoRATrainingType,
|
|
21
27
|
TogetherClient,
|
|
22
28
|
TogetherRequest,
|
|
23
|
-
TrainingType,
|
|
24
|
-
FinetuneLRScheduler,
|
|
25
|
-
FinetuneLinearLRScheduler,
|
|
26
|
-
FinetuneCosineLRScheduler,
|
|
27
|
-
FinetuneLinearLRSchedulerArgs,
|
|
28
|
-
FinetuneCosineLRSchedulerArgs,
|
|
29
29
|
TrainingMethodDPO,
|
|
30
30
|
TrainingMethodSFT,
|
|
31
|
-
|
|
31
|
+
TrainingType,
|
|
32
32
|
)
|
|
33
33
|
from together.types.finetune import (
|
|
34
34
|
DownloadCheckpointType,
|
|
35
|
-
FinetuneEventType,
|
|
36
35
|
FinetuneEvent,
|
|
36
|
+
FinetuneEventType,
|
|
37
37
|
)
|
|
38
38
|
from together.utils import (
|
|
39
|
+
get_event_step,
|
|
39
40
|
log_warn_once,
|
|
40
41
|
normalize_key,
|
|
41
|
-
get_event_step,
|
|
42
42
|
)
|
|
43
43
|
|
|
44
|
+
|
|
44
45
|
_FT_JOB_WITH_STEP_REGEX = r"^ft-[\dabcdef-]+:\d+$"
|
|
45
46
|
|
|
46
47
|
|
|
@@ -50,7 +51,7 @@ AVAILABLE_TRAINING_METHODS = {
|
|
|
50
51
|
}
|
|
51
52
|
|
|
52
53
|
|
|
53
|
-
def
|
|
54
|
+
def create_finetune_request(
|
|
54
55
|
model_limits: FinetuneTrainingLimits,
|
|
55
56
|
training_file: str,
|
|
56
57
|
model: str | None = None,
|
|
@@ -63,7 +64,7 @@ def createFinetuneRequest(
|
|
|
63
64
|
lr_scheduler_type: Literal["linear", "cosine"] = "linear",
|
|
64
65
|
min_lr_ratio: float = 0.0,
|
|
65
66
|
scheduler_num_cycles: float = 0.5,
|
|
66
|
-
warmup_ratio: float =
|
|
67
|
+
warmup_ratio: float | None = None,
|
|
67
68
|
max_grad_norm: float = 1.0,
|
|
68
69
|
weight_decay: float = 0.0,
|
|
69
70
|
lora: bool = False,
|
|
@@ -81,7 +82,6 @@ def createFinetuneRequest(
|
|
|
81
82
|
dpo_beta: float | None = None,
|
|
82
83
|
from_checkpoint: str | None = None,
|
|
83
84
|
) -> FinetuneRequest:
|
|
84
|
-
|
|
85
85
|
if model is not None and from_checkpoint is not None:
|
|
86
86
|
raise ValueError(
|
|
87
87
|
"You must specify either a model or a checkpoint to start a job from, not both"
|
|
@@ -90,6 +90,8 @@ def createFinetuneRequest(
|
|
|
90
90
|
if model is None and from_checkpoint is None:
|
|
91
91
|
raise ValueError("You must specify either a model or a checkpoint")
|
|
92
92
|
|
|
93
|
+
model_or_checkpoint = model or from_checkpoint
|
|
94
|
+
|
|
93
95
|
if batch_size == "max":
|
|
94
96
|
log_warn_once(
|
|
95
97
|
"Starting from together>=1.3.0, "
|
|
@@ -103,7 +105,9 @@ def createFinetuneRequest(
|
|
|
103
105
|
min_batch_size: int = 0
|
|
104
106
|
if lora:
|
|
105
107
|
if model_limits.lora_training is None:
|
|
106
|
-
raise ValueError(
|
|
108
|
+
raise ValueError(
|
|
109
|
+
f"LoRA adapters are not supported for the selected model ({model_or_checkpoint})."
|
|
110
|
+
)
|
|
107
111
|
lora_r = lora_r if lora_r is not None else model_limits.lora_training.max_rank
|
|
108
112
|
lora_alpha = lora_alpha if lora_alpha is not None else lora_r * 2
|
|
109
113
|
training_type = LoRATrainingType(
|
|
@@ -118,7 +122,9 @@ def createFinetuneRequest(
|
|
|
118
122
|
|
|
119
123
|
else:
|
|
120
124
|
if model_limits.full_training is None:
|
|
121
|
-
raise ValueError(
|
|
125
|
+
raise ValueError(
|
|
126
|
+
f"Full training is not supported for the selected model ({model_or_checkpoint})."
|
|
127
|
+
)
|
|
122
128
|
|
|
123
129
|
max_batch_size = model_limits.full_training.max_batch_size
|
|
124
130
|
min_batch_size = model_limits.full_training.min_batch_size
|
|
@@ -127,46 +133,50 @@ def createFinetuneRequest(
|
|
|
127
133
|
|
|
128
134
|
if batch_size > max_batch_size:
|
|
129
135
|
raise ValueError(
|
|
130
|
-
"Requested batch size is higher that the maximum allowed value."
|
|
136
|
+
f"Requested batch size of {batch_size} is higher that the maximum allowed value of {max_batch_size}."
|
|
131
137
|
)
|
|
132
138
|
|
|
133
139
|
if batch_size < min_batch_size:
|
|
134
140
|
raise ValueError(
|
|
135
|
-
"Requested batch size is lower that the minimum allowed value."
|
|
141
|
+
f"Requested batch size of {batch_size} is lower that the minimum allowed value of {min_batch_size}."
|
|
136
142
|
)
|
|
137
143
|
|
|
138
144
|
if warmup_ratio > 1 or warmup_ratio < 0:
|
|
139
|
-
raise ValueError("Warmup ratio should be between 0 and 1")
|
|
145
|
+
raise ValueError(f"Warmup ratio should be between 0 and 1 (got {warmup_ratio})")
|
|
140
146
|
|
|
141
147
|
if min_lr_ratio is not None and (min_lr_ratio > 1 or min_lr_ratio < 0):
|
|
142
|
-
raise ValueError(
|
|
148
|
+
raise ValueError(
|
|
149
|
+
f"Min learning rate ratio should be between 0 and 1 (got {min_lr_ratio})"
|
|
150
|
+
)
|
|
143
151
|
|
|
144
152
|
if max_grad_norm < 0:
|
|
145
|
-
raise ValueError(
|
|
153
|
+
raise ValueError(
|
|
154
|
+
f"Max gradient norm should be non-negative (got {max_grad_norm})"
|
|
155
|
+
)
|
|
146
156
|
|
|
147
157
|
if weight_decay is not None and (weight_decay < 0):
|
|
148
|
-
raise ValueError("Weight decay should be non-negative")
|
|
158
|
+
raise ValueError(f"Weight decay should be non-negative (got {weight_decay})")
|
|
149
159
|
|
|
150
160
|
if training_method not in AVAILABLE_TRAINING_METHODS:
|
|
151
161
|
raise ValueError(
|
|
152
162
|
f"training_method must be one of {', '.join(AVAILABLE_TRAINING_METHODS)}"
|
|
153
163
|
)
|
|
154
164
|
|
|
155
|
-
|
|
156
|
-
lrScheduler: FinetuneLRScheduler = FinetuneLRScheduler(lr_scheduler_type="linear")
|
|
157
|
-
|
|
165
|
+
lr_scheduler: FinetuneLRScheduler
|
|
158
166
|
if lr_scheduler_type == "cosine":
|
|
159
167
|
if scheduler_num_cycles <= 0.0:
|
|
160
|
-
raise ValueError(
|
|
168
|
+
raise ValueError(
|
|
169
|
+
f"Number of cycles should be greater than 0 (got {scheduler_num_cycles})"
|
|
170
|
+
)
|
|
161
171
|
|
|
162
|
-
|
|
163
|
-
lr_scheduler_args=
|
|
172
|
+
lr_scheduler = CosineLRScheduler(
|
|
173
|
+
lr_scheduler_args=CosineLRSchedulerArgs(
|
|
164
174
|
min_lr_ratio=min_lr_ratio, num_cycles=scheduler_num_cycles
|
|
165
175
|
),
|
|
166
176
|
)
|
|
167
177
|
else:
|
|
168
|
-
|
|
169
|
-
lr_scheduler_args=
|
|
178
|
+
lr_scheduler = LinearLRScheduler(
|
|
179
|
+
lr_scheduler_args=LinearLRSchedulerArgs(min_lr_ratio=min_lr_ratio),
|
|
170
180
|
)
|
|
171
181
|
|
|
172
182
|
training_method_cls: TrainingMethodSFT | TrainingMethodDPO = TrainingMethodSFT()
|
|
@@ -182,7 +192,7 @@ def createFinetuneRequest(
|
|
|
182
192
|
n_checkpoints=n_checkpoints,
|
|
183
193
|
batch_size=batch_size,
|
|
184
194
|
learning_rate=learning_rate,
|
|
185
|
-
lr_scheduler=
|
|
195
|
+
lr_scheduler=lr_scheduler,
|
|
186
196
|
warmup_ratio=warmup_ratio,
|
|
187
197
|
max_grad_norm=max_grad_norm,
|
|
188
198
|
weight_decay=weight_decay,
|
|
@@ -374,7 +384,7 @@ class FineTuning:
|
|
|
374
384
|
pass
|
|
375
385
|
model_limits = self.get_model_limits(model=model_name)
|
|
376
386
|
|
|
377
|
-
finetune_request =
|
|
387
|
+
finetune_request = create_finetune_request(
|
|
378
388
|
model_limits=model_limits,
|
|
379
389
|
training_file=training_file,
|
|
380
390
|
model=model,
|
|
@@ -590,7 +600,7 @@ class FineTuning:
|
|
|
590
600
|
raise ValueError(
|
|
591
601
|
"Only DEFAULT checkpoint type is allowed for FullTrainingType"
|
|
592
602
|
)
|
|
593
|
-
url += "&checkpoint=
|
|
603
|
+
url += "&checkpoint=model_output_path"
|
|
594
604
|
elif isinstance(ft_job.training_type, LoRATrainingType):
|
|
595
605
|
if checkpoint_type == DownloadCheckpointType.DEFAULT:
|
|
596
606
|
checkpoint_type = DownloadCheckpointType.MERGED
|
|
@@ -762,7 +772,7 @@ class AsyncFineTuning:
|
|
|
762
772
|
pass
|
|
763
773
|
model_limits = await self.get_model_limits(model=model_name)
|
|
764
774
|
|
|
765
|
-
finetune_request =
|
|
775
|
+
finetune_request = create_finetune_request(
|
|
766
776
|
model_limits=model_limits,
|
|
767
777
|
training_file=training_file,
|
|
768
778
|
model=model,
|
|
@@ -34,11 +34,11 @@ from together.types.finetune import (
|
|
|
34
34
|
TrainingMethodDPO,
|
|
35
35
|
TrainingMethodSFT,
|
|
36
36
|
FinetuneCheckpoint,
|
|
37
|
-
|
|
38
|
-
|
|
37
|
+
CosineLRScheduler,
|
|
38
|
+
CosineLRSchedulerArgs,
|
|
39
39
|
FinetuneDownloadResult,
|
|
40
|
-
|
|
41
|
-
|
|
40
|
+
LinearLRScheduler,
|
|
41
|
+
LinearLRSchedulerArgs,
|
|
42
42
|
FinetuneLRScheduler,
|
|
43
43
|
FinetuneList,
|
|
44
44
|
FinetuneListEvents,
|
|
@@ -72,10 +72,10 @@ __all__ = [
|
|
|
72
72
|
"FinetuneListEvents",
|
|
73
73
|
"FinetuneDownloadResult",
|
|
74
74
|
"FinetuneLRScheduler",
|
|
75
|
-
"
|
|
76
|
-
"
|
|
77
|
-
"
|
|
78
|
-
"
|
|
75
|
+
"LinearLRScheduler",
|
|
76
|
+
"LinearLRSchedulerArgs",
|
|
77
|
+
"CosineLRScheduler",
|
|
78
|
+
"CosineLRSchedulerArgs",
|
|
79
79
|
"FileRequest",
|
|
80
80
|
"FileResponse",
|
|
81
81
|
"FileList",
|
|
@@ -7,6 +7,16 @@ from pydantic import Field
|
|
|
7
7
|
from together.types.endpoints import TogetherJSONModel
|
|
8
8
|
|
|
9
9
|
|
|
10
|
+
class FileInput(TogetherJSONModel):
|
|
11
|
+
"""File input to be uploaded to the code interpreter session."""
|
|
12
|
+
|
|
13
|
+
name: str = Field(description="The name of the file.")
|
|
14
|
+
encoding: Literal["string", "base64"] = Field(
|
|
15
|
+
description="Encoding of the file content. Use 'string' for text files and 'base64' for binary files."
|
|
16
|
+
)
|
|
17
|
+
content: str = Field(description="The content of the file, encoded as specified.")
|
|
18
|
+
|
|
19
|
+
|
|
10
20
|
class InterpreterOutput(TogetherJSONModel):
|
|
11
21
|
"""Base class for interpreter output types."""
|
|
12
22
|
|
|
@@ -40,6 +50,7 @@ class ExecuteResponse(TogetherJSONModel):
|
|
|
40
50
|
|
|
41
51
|
|
|
42
52
|
__all__ = [
|
|
53
|
+
"FileInput",
|
|
43
54
|
"InterpreterOutput",
|
|
44
55
|
"ExecuteResponseData",
|
|
45
56
|
"ExecuteResponse",
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
3
|
from enum import Enum
|
|
4
|
-
from typing import List, Literal
|
|
4
|
+
from typing import List, Literal
|
|
5
5
|
|
|
6
|
-
from pydantic import StrictBool, Field,
|
|
6
|
+
from pydantic import StrictBool, Field, field_validator
|
|
7
7
|
|
|
8
8
|
from together.types.abstract import BaseModel
|
|
9
9
|
from together.types.common import (
|
|
@@ -176,7 +176,7 @@ class FinetuneRequest(BaseModel):
|
|
|
176
176
|
# training learning rate
|
|
177
177
|
learning_rate: float
|
|
178
178
|
# learning rate scheduler type and args
|
|
179
|
-
lr_scheduler:
|
|
179
|
+
lr_scheduler: LinearLRScheduler | CosineLRScheduler | None = None
|
|
180
180
|
# learning rate warmup ratio
|
|
181
181
|
warmup_ratio: float
|
|
182
182
|
# max gradient norm
|
|
@@ -239,7 +239,7 @@ class FinetuneResponse(BaseModel):
|
|
|
239
239
|
# training learning rate
|
|
240
240
|
learning_rate: float | None = None
|
|
241
241
|
# learning rate scheduler type and args
|
|
242
|
-
lr_scheduler:
|
|
242
|
+
lr_scheduler: LinearLRScheduler | CosineLRScheduler | EmptyLRScheduler | None = None
|
|
243
243
|
# learning rate warmup ratio
|
|
244
244
|
warmup_ratio: float | None = None
|
|
245
245
|
# max gradient norm
|
|
@@ -345,11 +345,11 @@ class FinetuneTrainingLimits(BaseModel):
|
|
|
345
345
|
lora_training: FinetuneLoraTrainingLimits | None = None
|
|
346
346
|
|
|
347
347
|
|
|
348
|
-
class
|
|
348
|
+
class LinearLRSchedulerArgs(BaseModel):
|
|
349
349
|
min_lr_ratio: float | None = 0.0
|
|
350
350
|
|
|
351
351
|
|
|
352
|
-
class
|
|
352
|
+
class CosineLRSchedulerArgs(BaseModel):
|
|
353
353
|
min_lr_ratio: float | None = 0.0
|
|
354
354
|
num_cycles: float | None = 0.5
|
|
355
355
|
|
|
@@ -358,14 +358,20 @@ class FinetuneLRScheduler(BaseModel):
|
|
|
358
358
|
lr_scheduler_type: str
|
|
359
359
|
|
|
360
360
|
|
|
361
|
-
class
|
|
361
|
+
class LinearLRScheduler(FinetuneLRScheduler):
|
|
362
362
|
lr_scheduler_type: Literal["linear"] = "linear"
|
|
363
|
-
|
|
363
|
+
lr_scheduler_args: LinearLRSchedulerArgs | None = None
|
|
364
364
|
|
|
365
365
|
|
|
366
|
-
class
|
|
366
|
+
class CosineLRScheduler(FinetuneLRScheduler):
|
|
367
367
|
lr_scheduler_type: Literal["cosine"] = "cosine"
|
|
368
|
-
|
|
368
|
+
lr_scheduler_args: CosineLRSchedulerArgs | None = None
|
|
369
|
+
|
|
370
|
+
|
|
371
|
+
# placeholder for old fine-tuning jobs with no lr_scheduler_type specified
|
|
372
|
+
class EmptyLRScheduler(FinetuneLRScheduler):
|
|
373
|
+
lr_scheduler_type: Literal[""]
|
|
374
|
+
lr_scheduler_args: None = None
|
|
369
375
|
|
|
370
376
|
|
|
371
377
|
class FinetuneCheckpoint(BaseModel):
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|