together 1.4.6__tar.gz → 1.5.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. {together-1.4.6 → together-1.5.3}/PKG-INFO +3 -4
  2. {together-1.4.6 → together-1.5.3}/README.md +1 -1
  3. {together-1.4.6 → together-1.5.3}/pyproject.toml +2 -2
  4. {together-1.4.6 → together-1.5.3}/src/together/cli/api/finetune.py +46 -9
  5. {together-1.4.6 → together-1.5.3}/src/together/resources/finetune.py +86 -22
  6. {together-1.4.6 → together-1.5.3}/src/together/types/__init__.py +7 -1
  7. {together-1.4.6 → together-1.5.3}/src/together/types/finetune.py +22 -8
  8. {together-1.4.6 → together-1.5.3}/LICENSE +0 -0
  9. {together-1.4.6 → together-1.5.3}/src/together/__init__.py +0 -0
  10. {together-1.4.6 → together-1.5.3}/src/together/abstract/__init__.py +0 -0
  11. {together-1.4.6 → together-1.5.3}/src/together/abstract/api_requestor.py +0 -0
  12. {together-1.4.6 → together-1.5.3}/src/together/cli/__init__.py +0 -0
  13. {together-1.4.6 → together-1.5.3}/src/together/cli/api/__init__.py +0 -0
  14. {together-1.4.6 → together-1.5.3}/src/together/cli/api/chat.py +0 -0
  15. {together-1.4.6 → together-1.5.3}/src/together/cli/api/completions.py +0 -0
  16. {together-1.4.6 → together-1.5.3}/src/together/cli/api/endpoints.py +0 -0
  17. {together-1.4.6 → together-1.5.3}/src/together/cli/api/files.py +0 -0
  18. {together-1.4.6 → together-1.5.3}/src/together/cli/api/images.py +0 -0
  19. {together-1.4.6 → together-1.5.3}/src/together/cli/api/models.py +0 -0
  20. {together-1.4.6 → together-1.5.3}/src/together/cli/api/utils.py +0 -0
  21. {together-1.4.6 → together-1.5.3}/src/together/cli/cli.py +0 -0
  22. {together-1.4.6 → together-1.5.3}/src/together/client.py +0 -0
  23. {together-1.4.6 → together-1.5.3}/src/together/constants.py +0 -0
  24. {together-1.4.6 → together-1.5.3}/src/together/error.py +0 -0
  25. {together-1.4.6 → together-1.5.3}/src/together/filemanager.py +0 -0
  26. {together-1.4.6 → together-1.5.3}/src/together/legacy/__init__.py +0 -0
  27. {together-1.4.6 → together-1.5.3}/src/together/legacy/base.py +0 -0
  28. {together-1.4.6 → together-1.5.3}/src/together/legacy/complete.py +0 -0
  29. {together-1.4.6 → together-1.5.3}/src/together/legacy/embeddings.py +0 -0
  30. {together-1.4.6 → together-1.5.3}/src/together/legacy/files.py +0 -0
  31. {together-1.4.6 → together-1.5.3}/src/together/legacy/finetune.py +0 -0
  32. {together-1.4.6 → together-1.5.3}/src/together/legacy/images.py +0 -0
  33. {together-1.4.6 → together-1.5.3}/src/together/legacy/models.py +0 -0
  34. {together-1.4.6 → together-1.5.3}/src/together/resources/__init__.py +0 -0
  35. {together-1.4.6 → together-1.5.3}/src/together/resources/audio/__init__.py +0 -0
  36. {together-1.4.6 → together-1.5.3}/src/together/resources/audio/speech.py +0 -0
  37. {together-1.4.6 → together-1.5.3}/src/together/resources/chat/__init__.py +0 -0
  38. {together-1.4.6 → together-1.5.3}/src/together/resources/chat/completions.py +0 -0
  39. {together-1.4.6 → together-1.5.3}/src/together/resources/completions.py +0 -0
  40. {together-1.4.6 → together-1.5.3}/src/together/resources/embeddings.py +0 -0
  41. {together-1.4.6 → together-1.5.3}/src/together/resources/endpoints.py +0 -0
  42. {together-1.4.6 → together-1.5.3}/src/together/resources/files.py +0 -0
  43. {together-1.4.6 → together-1.5.3}/src/together/resources/images.py +0 -0
  44. {together-1.4.6 → together-1.5.3}/src/together/resources/models.py +0 -0
  45. {together-1.4.6 → together-1.5.3}/src/together/resources/rerank.py +0 -0
  46. {together-1.4.6 → together-1.5.3}/src/together/together_response.py +0 -0
  47. {together-1.4.6 → together-1.5.3}/src/together/types/abstract.py +0 -0
  48. {together-1.4.6 → together-1.5.3}/src/together/types/audio_speech.py +0 -0
  49. {together-1.4.6 → together-1.5.3}/src/together/types/chat_completions.py +0 -0
  50. {together-1.4.6 → together-1.5.3}/src/together/types/common.py +0 -0
  51. {together-1.4.6 → together-1.5.3}/src/together/types/completions.py +0 -0
  52. {together-1.4.6 → together-1.5.3}/src/together/types/embeddings.py +0 -0
  53. {together-1.4.6 → together-1.5.3}/src/together/types/endpoints.py +0 -0
  54. {together-1.4.6 → together-1.5.3}/src/together/types/error.py +0 -0
  55. {together-1.4.6 → together-1.5.3}/src/together/types/files.py +0 -0
  56. {together-1.4.6 → together-1.5.3}/src/together/types/images.py +0 -0
  57. {together-1.4.6 → together-1.5.3}/src/together/types/models.py +0 -0
  58. {together-1.4.6 → together-1.5.3}/src/together/types/rerank.py +0 -0
  59. {together-1.4.6 → together-1.5.3}/src/together/utils/__init__.py +0 -0
  60. {together-1.4.6 → together-1.5.3}/src/together/utils/_log.py +0 -0
  61. {together-1.4.6 → together-1.5.3}/src/together/utils/api_helpers.py +0 -0
  62. {together-1.4.6 → together-1.5.3}/src/together/utils/files.py +0 -0
  63. {together-1.4.6 → together-1.5.3}/src/together/utils/tools.py +0 -0
  64. {together-1.4.6 → together-1.5.3}/src/together/version.py +0 -0
@@ -1,15 +1,14 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: together
3
- Version: 1.4.6
3
+ Version: 1.5.3
4
4
  Summary: Python client for Together's Cloud Platform!
5
5
  License: Apache-2.0
6
6
  Author: Together AI
7
7
  Author-email: support@together.ai
8
- Requires-Python: >=3.9,<4.0
8
+ Requires-Python: >=3.10,<4.0
9
9
  Classifier: License :: OSI Approved :: Apache Software License
10
10
  Classifier: Operating System :: POSIX :: Linux
11
11
  Classifier: Programming Language :: Python :: 3
12
- Classifier: Programming Language :: Python :: 3.9
13
12
  Classifier: Programming Language :: Python :: 3.10
14
13
  Classifier: Programming Language :: Python :: 3.11
15
14
  Classifier: Programming Language :: Python :: 3.12
@@ -45,7 +44,7 @@ Description-Content-Type: text/markdown
45
44
  [![Discord](https://dcbadge.vercel.app/api/server/9Rk6sSeWEG?style=flat&compact=true)](https://discord.com/invite/9Rk6sSeWEG)
46
45
  [![Twitter](https://img.shields.io/twitter/url/https/twitter.com/togethercompute.svg?style=social&label=Follow%20%40togethercompute)](https://twitter.com/togethercompute)
47
46
 
48
- The [Together Python API Library](https://pypi.org/project/together/) is the official Python client for Together's API platform, providing a convenient way for interacting with the REST APIs and enables easy integrations with Python 3.8+ applications with easy to use synchronous and asynchronous clients.
47
+ The [Together Python API Library](https://pypi.org/project/together/) is the official Python client for Together's API platform, providing a convenient way for interacting with the REST APIs and enables easy integrations with Python 3.10+ applications with easy to use synchronous and asynchronous clients.
49
48
 
50
49
 
51
50
 
@@ -10,7 +10,7 @@
10
10
  [![Discord](https://dcbadge.vercel.app/api/server/9Rk6sSeWEG?style=flat&compact=true)](https://discord.com/invite/9Rk6sSeWEG)
11
11
  [![Twitter](https://img.shields.io/twitter/url/https/twitter.com/togethercompute.svg?style=social&label=Follow%20%40togethercompute)](https://twitter.com/togethercompute)
12
12
 
13
- The [Together Python API Library](https://pypi.org/project/together/) is the official Python client for Together's API platform, providing a convenient way for interacting with the REST APIs and enables easy integrations with Python 3.8+ applications with easy to use synchronous and asynchronous clients.
13
+ The [Together Python API Library](https://pypi.org/project/together/) is the official Python client for Together's API platform, providing a convenient way for interacting with the REST APIs and enables easy integrations with Python 3.10+ applications with easy to use synchronous and asynchronous clients.
14
14
 
15
15
 
16
16
 
@@ -12,7 +12,7 @@ build-backend = "poetry.masonry.api"
12
12
 
13
13
  [tool.poetry]
14
14
  name = "together"
15
- version = "1.4.6"
15
+ version = "1.5.3"
16
16
  authors = [
17
17
  "Together AI <support@together.ai>"
18
18
  ]
@@ -28,7 +28,7 @@ repository = "https://github.com/togethercomputer/together-python"
28
28
  homepage = "https://github.com/togethercomputer/together-python"
29
29
 
30
30
  [tool.poetry.dependencies]
31
- python = "^3.9"
31
+ python = "^3.10"
32
32
  typer = ">=0.9,<0.16"
33
33
  requests = "^2.31.0"
34
34
  rich = "^13.8.1"
@@ -58,30 +58,50 @@ def fine_tuning(ctx: click.Context) -> None:
58
58
  @fine_tuning.command()
59
59
  @click.pass_context
60
60
  @click.option(
61
- "--training-file", type=str, required=True, help="Training file ID from Files API"
61
+ "--training-file",
62
+ "-t",
63
+ type=str,
64
+ required=True,
65
+ help="Training file ID from Files API",
66
+ )
67
+ @click.option("--model", "-m", type=str, help="Base model name")
68
+ @click.option(
69
+ "--n-epochs", "-ne", type=int, default=1, help="Number of epochs to train for"
62
70
  )
63
- @click.option("--model", type=str, required=True, help="Base model name")
64
- @click.option("--n-epochs", type=int, default=1, help="Number of epochs to train for")
65
71
  @click.option(
66
72
  "--validation-file", type=str, default="", help="Validation file ID from Files API"
67
73
  )
68
74
  @click.option("--n-evals", type=int, default=0, help="Number of evaluation loops")
69
75
  @click.option(
70
- "--n-checkpoints", type=int, default=1, help="Number of checkpoints to save"
76
+ "--n-checkpoints", "-c", type=int, default=1, help="Number of checkpoints to save"
77
+ )
78
+ @click.option(
79
+ "--batch-size", "-b", type=INT_WITH_MAX, default="max", help="Train batch size"
80
+ )
81
+ @click.option("--learning-rate", "-lr", type=float, default=1e-5, help="Learning rate")
82
+ @click.option(
83
+ "--lr-scheduler-type",
84
+ type=click.Choice(["linear", "cosine"]),
85
+ default="linear",
86
+ help="Learning rate scheduler type",
71
87
  )
72
- @click.option("--batch-size", type=INT_WITH_MAX, default="max", help="Train batch size")
73
- @click.option("--learning-rate", type=float, default=1e-5, help="Learning rate")
74
88
  @click.option(
75
89
  "--min-lr-ratio",
76
90
  type=float,
77
91
  default=0.0,
78
92
  help="The ratio of the final learning rate to the peak learning rate",
79
93
  )
94
+ @click.option(
95
+ "--scheduler-num-cycles",
96
+ type=float,
97
+ default=0.5,
98
+ help="Number or fraction of cycles for the cosine learning rate scheduler.",
99
+ )
80
100
  @click.option(
81
101
  "--warmup-ratio",
82
102
  type=float,
83
103
  default=0.0,
84
- help="Warmup ratio for learning rate scheduler.",
104
+ help="Warmup ratio for the learning rate scheduler.",
85
105
  )
86
106
  @click.option(
87
107
  "--max-grad-norm",
@@ -123,7 +143,11 @@ def fine_tuning(ctx: click.Context) -> None:
123
143
  help="Beta parameter for DPO training (only used when '--training-method' is 'dpo')",
124
144
  )
125
145
  @click.option(
126
- "--suffix", type=str, default=None, help="Suffix for the fine-tuned model name"
146
+ "--suffix",
147
+ "-s",
148
+ type=str,
149
+ default=None,
150
+ help="Suffix for the fine-tuned model name",
127
151
  )
128
152
  @click.option("--wandb-api-key", type=str, default=None, help="Wandb API key")
129
153
  @click.option("--wandb-base-url", type=str, default=None, help="Wandb base URL")
@@ -162,7 +186,9 @@ def create(
162
186
  n_checkpoints: int,
163
187
  batch_size: int | Literal["max"],
164
188
  learning_rate: float,
189
+ lr_scheduler_type: Literal["linear", "cosine"],
165
190
  min_lr_ratio: float,
191
+ scheduler_num_cycles: float,
166
192
  warmup_ratio: float,
167
193
  max_grad_norm: float,
168
194
  weight_decay: float,
@@ -194,7 +220,9 @@ def create(
194
220
  n_checkpoints=n_checkpoints,
195
221
  batch_size=batch_size,
196
222
  learning_rate=learning_rate,
223
+ lr_scheduler_type=lr_scheduler_type,
197
224
  min_lr_ratio=min_lr_ratio,
225
+ scheduler_num_cycles=scheduler_num_cycles,
198
226
  warmup_ratio=warmup_ratio,
199
227
  max_grad_norm=max_grad_norm,
200
228
  weight_decay=weight_decay,
@@ -214,8 +242,15 @@ def create(
214
242
  from_checkpoint=from_checkpoint,
215
243
  )
216
244
 
245
+ if model is None and from_checkpoint is None:
246
+ raise click.BadParameter("You must specify either a model or a checkpoint")
247
+
248
+ model_name = model
249
+ if from_checkpoint is not None:
250
+ model_name = from_checkpoint.split(":")[0]
251
+
217
252
  model_limits: FinetuneTrainingLimits = client.fine_tuning.get_model_limits(
218
- model=model
253
+ model=model_name
219
254
  )
220
255
 
221
256
  if lora:
@@ -411,6 +446,7 @@ def list_checkpoints(ctx: click.Context, fine_tune_id: str) -> None:
411
446
  @click.argument("fine_tune_id", type=str, required=True)
412
447
  @click.option(
413
448
  "--output_dir",
449
+ "-o",
414
450
  type=click.Path(exists=True, file_okay=False, resolve_path=True),
415
451
  required=False,
416
452
  default=None,
@@ -418,6 +454,7 @@ def list_checkpoints(ctx: click.Context, fine_tune_id: str) -> None:
418
454
  )
419
455
  @click.option(
420
456
  "--checkpoint-step",
457
+ "-s",
421
458
  type=int,
422
459
  required=False,
423
460
  default=None,
@@ -22,7 +22,10 @@ from together.types import (
22
22
  TogetherRequest,
23
23
  TrainingType,
24
24
  FinetuneLRScheduler,
25
+ FinetuneLinearLRScheduler,
26
+ FinetuneCosineLRScheduler,
25
27
  FinetuneLinearLRSchedulerArgs,
28
+ FinetuneCosineLRSchedulerArgs,
26
29
  TrainingMethodDPO,
27
30
  TrainingMethodSFT,
28
31
  FinetuneCheckpoint,
@@ -50,14 +53,16 @@ AVAILABLE_TRAINING_METHODS = {
50
53
  def createFinetuneRequest(
51
54
  model_limits: FinetuneTrainingLimits,
52
55
  training_file: str,
53
- model: str,
56
+ model: str | None = None,
54
57
  n_epochs: int = 1,
55
58
  validation_file: str | None = "",
56
59
  n_evals: int | None = 0,
57
60
  n_checkpoints: int | None = 1,
58
61
  batch_size: int | Literal["max"] = "max",
59
62
  learning_rate: float | None = 0.00001,
63
+ lr_scheduler_type: Literal["linear", "cosine"] = "linear",
60
64
  min_lr_ratio: float = 0.0,
65
+ scheduler_num_cycles: float = 0.5,
61
66
  warmup_ratio: float = 0.0,
62
67
  max_grad_norm: float = 1.0,
63
68
  weight_decay: float = 0.0,
@@ -77,6 +82,14 @@ def createFinetuneRequest(
77
82
  from_checkpoint: str | None = None,
78
83
  ) -> FinetuneRequest:
79
84
 
85
+ if model is not None and from_checkpoint is not None:
86
+ raise ValueError(
87
+ "You must specify either a model or a checkpoint to start a job from, not both"
88
+ )
89
+
90
+ if model is None and from_checkpoint is None:
91
+ raise ValueError("You must specify either a model or a checkpoint")
92
+
80
93
  if batch_size == "max":
81
94
  log_warn_once(
82
95
  "Starting from together>=1.3.0, "
@@ -86,6 +99,8 @@ def createFinetuneRequest(
86
99
  warmup_ratio = 0.0
87
100
 
88
101
  training_type: TrainingType = FullTrainingType()
102
+ max_batch_size: int = 0
103
+ min_batch_size: int = 0
89
104
  if lora:
90
105
  if model_limits.lora_training is None:
91
106
  raise ValueError("LoRA adapters are not supported for the selected model.")
@@ -98,18 +113,26 @@ def createFinetuneRequest(
98
113
  lora_trainable_modules=lora_trainable_modules,
99
114
  )
100
115
 
101
- batch_size = (
102
- batch_size
103
- if batch_size != "max"
104
- else model_limits.lora_training.max_batch_size
105
- )
116
+ max_batch_size = model_limits.lora_training.max_batch_size
117
+ min_batch_size = model_limits.lora_training.min_batch_size
118
+
106
119
  else:
107
120
  if model_limits.full_training is None:
108
121
  raise ValueError("Full training is not supported for the selected model.")
109
- batch_size = (
110
- batch_size
111
- if batch_size != "max"
112
- else model_limits.full_training.max_batch_size
122
+
123
+ max_batch_size = model_limits.full_training.max_batch_size
124
+ min_batch_size = model_limits.full_training.min_batch_size
125
+
126
+ batch_size = batch_size if batch_size != "max" else max_batch_size
127
+
128
+ if batch_size > max_batch_size:
129
+ raise ValueError(
130
+ "Requested batch size is higher that the maximum allowed value."
131
+ )
132
+
133
+ if batch_size < min_batch_size:
134
+ raise ValueError(
135
+ "Requested batch size is lower that the minimum allowed value."
113
136
  )
114
137
 
115
138
  if warmup_ratio > 1 or warmup_ratio < 0:
@@ -129,10 +152,22 @@ def createFinetuneRequest(
129
152
  f"training_method must be one of {', '.join(AVAILABLE_TRAINING_METHODS)}"
130
153
  )
131
154
 
132
- lrScheduler = FinetuneLRScheduler(
133
- lr_scheduler_type="linear",
134
- lr_scheduler_args=FinetuneLinearLRSchedulerArgs(min_lr_ratio=min_lr_ratio),
135
- )
155
+ # Default to generic lr scheduler
156
+ lrScheduler: FinetuneLRScheduler = FinetuneLRScheduler(lr_scheduler_type="linear")
157
+
158
+ if lr_scheduler_type == "cosine":
159
+ if scheduler_num_cycles <= 0.0:
160
+ raise ValueError("Number of cycles should be greater than 0")
161
+
162
+ lrScheduler = FinetuneCosineLRScheduler(
163
+ lr_scheduler_args=FinetuneCosineLRSchedulerArgs(
164
+ min_lr_ratio=min_lr_ratio, num_cycles=scheduler_num_cycles
165
+ ),
166
+ )
167
+ else:
168
+ lrScheduler = FinetuneLinearLRScheduler(
169
+ lr_scheduler_args=FinetuneLinearLRSchedulerArgs(min_lr_ratio=min_lr_ratio),
170
+ )
136
171
 
137
172
  training_method_cls: TrainingMethodSFT | TrainingMethodDPO = TrainingMethodSFT()
138
173
  if training_method == "dpo":
@@ -237,14 +272,16 @@ class FineTuning:
237
272
  self,
238
273
  *,
239
274
  training_file: str,
240
- model: str,
275
+ model: str | None = None,
241
276
  n_epochs: int = 1,
242
277
  validation_file: str | None = "",
243
278
  n_evals: int | None = 0,
244
279
  n_checkpoints: int | None = 1,
245
280
  batch_size: int | Literal["max"] = "max",
246
281
  learning_rate: float | None = 0.00001,
282
+ lr_scheduler_type: Literal["linear", "cosine"] = "linear",
247
283
  min_lr_ratio: float = 0.0,
284
+ scheduler_num_cycles: float = 0.5,
248
285
  warmup_ratio: float = 0.0,
249
286
  max_grad_norm: float = 1.0,
250
287
  weight_decay: float = 0.0,
@@ -270,7 +307,7 @@ class FineTuning:
270
307
 
271
308
  Args:
272
309
  training_file (str): File-ID of a file uploaded to the Together API
273
- model (str): Name of the base model to run fine-tune job on
310
+ model (str, optional): Name of the base model to run fine-tune job on
274
311
  n_epochs (int, optional): Number of epochs for fine-tuning. Defaults to 1.
275
312
  validation file (str, optional): File ID of a file uploaded to the Together API for validation.
276
313
  n_evals (int, optional): Number of evaluation loops to run. Defaults to 0.
@@ -279,9 +316,11 @@ class FineTuning:
279
316
  batch_size (int or "max"): Batch size for fine-tuning. Defaults to max.
280
317
  learning_rate (float, optional): Learning rate multiplier to use for training
281
318
  Defaults to 0.00001.
319
+ lr_scheduler_type (Literal["linear", "cosine"]): Learning rate scheduler type. Defaults to "linear".
282
320
  min_lr_ratio (float, optional): Min learning rate ratio of the initial learning rate for
283
321
  the learning rate scheduler. Defaults to 0.0.
284
- warmup_ratio (float, optional): Warmup ratio for learning rate scheduler.
322
+ scheduler_num_cycles (float, optional): Number or fraction of cycles for the cosine learning rate scheduler. Defaults to 0.5.
323
+ warmup_ratio (float, optional): Warmup ratio for the learning rate scheduler.
285
324
  max_grad_norm (float, optional): Max gradient norm. Defaults to 1.0, set to 0 to disable.
286
325
  weight_decay (float, optional): Weight decay. Defaults to 0.0.
287
326
  lora (bool, optional): Whether to use LoRA adapters. Defaults to True.
@@ -325,7 +364,16 @@ class FineTuning:
325
364
  )
326
365
 
327
366
  if model_limits is None:
328
- model_limits = self.get_model_limits(model=model)
367
+ # mypy doesn't understand that model or from_checkpoint is not None
368
+ if model is not None:
369
+ model_name = model
370
+ elif from_checkpoint is not None:
371
+ model_name = from_checkpoint.split(":")[0]
372
+ else:
373
+ # this branch is unreachable, but mypy doesn't know that
374
+ pass
375
+ model_limits = self.get_model_limits(model=model_name)
376
+
329
377
  finetune_request = createFinetuneRequest(
330
378
  model_limits=model_limits,
331
379
  training_file=training_file,
@@ -336,7 +384,9 @@ class FineTuning:
336
384
  n_checkpoints=n_checkpoints,
337
385
  batch_size=batch_size,
338
386
  learning_rate=learning_rate,
387
+ lr_scheduler_type=lr_scheduler_type,
339
388
  min_lr_ratio=min_lr_ratio,
389
+ scheduler_num_cycles=scheduler_num_cycles,
340
390
  warmup_ratio=warmup_ratio,
341
391
  max_grad_norm=max_grad_norm,
342
392
  weight_decay=weight_decay,
@@ -610,14 +660,16 @@ class AsyncFineTuning:
610
660
  self,
611
661
  *,
612
662
  training_file: str,
613
- model: str,
663
+ model: str | None = None,
614
664
  n_epochs: int = 1,
615
665
  validation_file: str | None = "",
616
666
  n_evals: int | None = 0,
617
667
  n_checkpoints: int | None = 1,
618
668
  batch_size: int | Literal["max"] = "max",
619
669
  learning_rate: float | None = 0.00001,
670
+ lr_scheduler_type: Literal["linear", "cosine"] = "linear",
620
671
  min_lr_ratio: float = 0.0,
672
+ scheduler_num_cycles: float = 0.5,
621
673
  warmup_ratio: float = 0.0,
622
674
  max_grad_norm: float = 1.0,
623
675
  weight_decay: float = 0.0,
@@ -643,7 +695,7 @@ class AsyncFineTuning:
643
695
 
644
696
  Args:
645
697
  training_file (str): File-ID of a file uploaded to the Together API
646
- model (str): Name of the base model to run fine-tune job on
698
+ model (str, optional): Name of the base model to run fine-tune job on
647
699
  n_epochs (int, optional): Number of epochs for fine-tuning. Defaults to 1.
648
700
  validation file (str, optional): File ID of a file uploaded to the Together API for validation.
649
701
  n_evals (int, optional): Number of evaluation loops to run. Defaults to 0.
@@ -652,9 +704,11 @@ class AsyncFineTuning:
652
704
  batch_size (int, optional): Batch size for fine-tuning. Defaults to max.
653
705
  learning_rate (float, optional): Learning rate multiplier to use for training
654
706
  Defaults to 0.00001.
707
+ lr_scheduler_type (Literal["linear", "cosine"]): Learning rate scheduler type. Defaults to "linear".
655
708
  min_lr_ratio (float, optional): Min learning rate ratio of the initial learning rate for
656
709
  the learning rate scheduler. Defaults to 0.0.
657
- warmup_ratio (float, optional): Warmup ratio for learning rate scheduler.
710
+ scheduler_num_cycles (float, optional): Number or fraction of cycles for the cosine learning rate scheduler. Defaults to 0.5.
711
+ warmup_ratio (float, optional): Warmup ratio for the learning rate scheduler.
658
712
  max_grad_norm (float, optional): Max gradient norm. Defaults to 1.0, set to 0 to disable.
659
713
  weight_decay (float, optional): Weight decay. Defaults to 0.0.
660
714
  lora (bool, optional): Whether to use LoRA adapters. Defaults to True.
@@ -698,7 +752,15 @@ class AsyncFineTuning:
698
752
  )
699
753
 
700
754
  if model_limits is None:
701
- model_limits = await self.get_model_limits(model=model)
755
+ # mypy doesn't understand that model or from_checkpoint is not None
756
+ if model is not None:
757
+ model_name = model
758
+ elif from_checkpoint is not None:
759
+ model_name = from_checkpoint.split(":")[0]
760
+ else:
761
+ # this branch is unreachable, but mypy doesn't know that
762
+ pass
763
+ model_limits = await self.get_model_limits(model=model_name)
702
764
 
703
765
  finetune_request = createFinetuneRequest(
704
766
  model_limits=model_limits,
@@ -710,7 +772,9 @@ class AsyncFineTuning:
710
772
  n_checkpoints=n_checkpoints,
711
773
  batch_size=batch_size,
712
774
  learning_rate=learning_rate,
775
+ lr_scheduler_type=lr_scheduler_type,
713
776
  min_lr_ratio=min_lr_ratio,
777
+ scheduler_num_cycles=scheduler_num_cycles,
714
778
  warmup_ratio=warmup_ratio,
715
779
  max_grad_norm=max_grad_norm,
716
780
  weight_decay=weight_decay,
@@ -34,11 +34,14 @@ from together.types.finetune import (
34
34
  TrainingMethodDPO,
35
35
  TrainingMethodSFT,
36
36
  FinetuneCheckpoint,
37
+ FinetuneCosineLRScheduler,
38
+ FinetuneCosineLRSchedulerArgs,
37
39
  FinetuneDownloadResult,
40
+ FinetuneLinearLRScheduler,
38
41
  FinetuneLinearLRSchedulerArgs,
42
+ FinetuneLRScheduler,
39
43
  FinetuneList,
40
44
  FinetuneListEvents,
41
- FinetuneLRScheduler,
42
45
  FinetuneRequest,
43
46
  FinetuneResponse,
44
47
  FinetuneTrainingLimits,
@@ -69,7 +72,10 @@ __all__ = [
69
72
  "FinetuneListEvents",
70
73
  "FinetuneDownloadResult",
71
74
  "FinetuneLRScheduler",
75
+ "FinetuneLinearLRScheduler",
72
76
  "FinetuneLinearLRSchedulerArgs",
77
+ "FinetuneCosineLRScheduler",
78
+ "FinetuneCosineLRSchedulerArgs",
73
79
  "FileRequest",
74
80
  "FileResponse",
75
81
  "FileList",
@@ -1,9 +1,9 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  from enum import Enum
4
- from typing import List, Literal
4
+ from typing import List, Literal, Union
5
5
 
6
- from pydantic import StrictBool, Field, validator, field_validator
6
+ from pydantic import StrictBool, Field, validator, field_validator, ValidationInfo
7
7
 
8
8
  from together.types.abstract import BaseModel
9
9
  from together.types.common import (
@@ -170,13 +170,13 @@ class FinetuneRequest(BaseModel):
170
170
  # validation file id
171
171
  validation_file: str | None = None
172
172
  # base model string
173
- model: str
173
+ model: str | None = None
174
174
  # number of epochs to train for
175
175
  n_epochs: int
176
176
  # training learning rate
177
177
  learning_rate: float
178
178
  # learning rate scheduler type and args
179
- lr_scheduler: FinetuneLRScheduler | None = None
179
+ lr_scheduler: FinetuneLinearLRScheduler | FinetuneCosineLRScheduler | None = None
180
180
  # learning rate warmup ratio
181
181
  warmup_ratio: float
182
182
  # max gradient norm
@@ -239,7 +239,7 @@ class FinetuneResponse(BaseModel):
239
239
  # training learning rate
240
240
  learning_rate: float | None = None
241
241
  # learning rate scheduler type and args
242
- lr_scheduler: FinetuneLRScheduler | None = None
242
+ lr_scheduler: FinetuneLinearLRScheduler | FinetuneCosineLRScheduler | None = None
243
243
  # learning rate warmup ratio
244
244
  warmup_ratio: float | None = None
245
245
  # max gradient norm
@@ -345,13 +345,27 @@ class FinetuneTrainingLimits(BaseModel):
345
345
  lora_training: FinetuneLoraTrainingLimits | None = None
346
346
 
347
347
 
348
+ class FinetuneLinearLRSchedulerArgs(BaseModel):
349
+ min_lr_ratio: float | None = 0.0
350
+
351
+
352
+ class FinetuneCosineLRSchedulerArgs(BaseModel):
353
+ min_lr_ratio: float | None = 0.0
354
+ num_cycles: float | None = 0.5
355
+
356
+
348
357
  class FinetuneLRScheduler(BaseModel):
349
358
  lr_scheduler_type: str
350
- lr_scheduler_args: FinetuneLinearLRSchedulerArgs | None = None
351
359
 
352
360
 
353
- class FinetuneLinearLRSchedulerArgs(BaseModel):
354
- min_lr_ratio: float | None = 0.0
361
+ class FinetuneLinearLRScheduler(FinetuneLRScheduler):
362
+ lr_scheduler_type: Literal["linear"] = "linear"
363
+ lr_scheduler: FinetuneLinearLRSchedulerArgs | None = None
364
+
365
+
366
+ class FinetuneCosineLRScheduler(FinetuneLRScheduler):
367
+ lr_scheduler_type: Literal["cosine"] = "cosine"
368
+ lr_scheduler: FinetuneCosineLRSchedulerArgs | None = None
355
369
 
356
370
 
357
371
  class FinetuneCheckpoint(BaseModel):
File without changes
File without changes