together 1.4.5__tar.gz → 1.5.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. {together-1.4.5 → together-1.5.2}/PKG-INFO +3 -4
  2. {together-1.4.5 → together-1.5.2}/README.md +1 -1
  3. {together-1.4.5 → together-1.5.2}/pyproject.toml +2 -2
  4. {together-1.4.5 → together-1.5.2}/src/together/cli/api/endpoints.py +20 -1
  5. {together-1.4.5 → together-1.5.2}/src/together/cli/api/finetune.py +46 -9
  6. {together-1.4.5 → together-1.5.2}/src/together/cli/api/models.py +20 -7
  7. {together-1.4.5 → together-1.5.2}/src/together/resources/endpoints.py +24 -4
  8. {together-1.4.5 → together-1.5.2}/src/together/resources/finetune.py +70 -13
  9. together-1.5.2/src/together/resources/models.py +134 -0
  10. {together-1.4.5 → together-1.5.2}/src/together/types/__init__.py +7 -1
  11. {together-1.4.5 → together-1.5.2}/src/together/types/finetune.py +21 -7
  12. together-1.4.5/src/together/resources/models.py +0 -75
  13. {together-1.4.5 → together-1.5.2}/LICENSE +0 -0
  14. {together-1.4.5 → together-1.5.2}/src/together/__init__.py +0 -0
  15. {together-1.4.5 → together-1.5.2}/src/together/abstract/__init__.py +0 -0
  16. {together-1.4.5 → together-1.5.2}/src/together/abstract/api_requestor.py +0 -0
  17. {together-1.4.5 → together-1.5.2}/src/together/cli/__init__.py +0 -0
  18. {together-1.4.5 → together-1.5.2}/src/together/cli/api/__init__.py +0 -0
  19. {together-1.4.5 → together-1.5.2}/src/together/cli/api/chat.py +0 -0
  20. {together-1.4.5 → together-1.5.2}/src/together/cli/api/completions.py +0 -0
  21. {together-1.4.5 → together-1.5.2}/src/together/cli/api/files.py +0 -0
  22. {together-1.4.5 → together-1.5.2}/src/together/cli/api/images.py +0 -0
  23. {together-1.4.5 → together-1.5.2}/src/together/cli/api/utils.py +0 -0
  24. {together-1.4.5 → together-1.5.2}/src/together/cli/cli.py +0 -0
  25. {together-1.4.5 → together-1.5.2}/src/together/client.py +0 -0
  26. {together-1.4.5 → together-1.5.2}/src/together/constants.py +0 -0
  27. {together-1.4.5 → together-1.5.2}/src/together/error.py +0 -0
  28. {together-1.4.5 → together-1.5.2}/src/together/filemanager.py +0 -0
  29. {together-1.4.5 → together-1.5.2}/src/together/legacy/__init__.py +0 -0
  30. {together-1.4.5 → together-1.5.2}/src/together/legacy/base.py +0 -0
  31. {together-1.4.5 → together-1.5.2}/src/together/legacy/complete.py +0 -0
  32. {together-1.4.5 → together-1.5.2}/src/together/legacy/embeddings.py +0 -0
  33. {together-1.4.5 → together-1.5.2}/src/together/legacy/files.py +0 -0
  34. {together-1.4.5 → together-1.5.2}/src/together/legacy/finetune.py +0 -0
  35. {together-1.4.5 → together-1.5.2}/src/together/legacy/images.py +0 -0
  36. {together-1.4.5 → together-1.5.2}/src/together/legacy/models.py +0 -0
  37. {together-1.4.5 → together-1.5.2}/src/together/resources/__init__.py +0 -0
  38. {together-1.4.5 → together-1.5.2}/src/together/resources/audio/__init__.py +0 -0
  39. {together-1.4.5 → together-1.5.2}/src/together/resources/audio/speech.py +0 -0
  40. {together-1.4.5 → together-1.5.2}/src/together/resources/chat/__init__.py +0 -0
  41. {together-1.4.5 → together-1.5.2}/src/together/resources/chat/completions.py +0 -0
  42. {together-1.4.5 → together-1.5.2}/src/together/resources/completions.py +0 -0
  43. {together-1.4.5 → together-1.5.2}/src/together/resources/embeddings.py +0 -0
  44. {together-1.4.5 → together-1.5.2}/src/together/resources/files.py +0 -0
  45. {together-1.4.5 → together-1.5.2}/src/together/resources/images.py +0 -0
  46. {together-1.4.5 → together-1.5.2}/src/together/resources/rerank.py +0 -0
  47. {together-1.4.5 → together-1.5.2}/src/together/together_response.py +0 -0
  48. {together-1.4.5 → together-1.5.2}/src/together/types/abstract.py +0 -0
  49. {together-1.4.5 → together-1.5.2}/src/together/types/audio_speech.py +0 -0
  50. {together-1.4.5 → together-1.5.2}/src/together/types/chat_completions.py +0 -0
  51. {together-1.4.5 → together-1.5.2}/src/together/types/common.py +0 -0
  52. {together-1.4.5 → together-1.5.2}/src/together/types/completions.py +0 -0
  53. {together-1.4.5 → together-1.5.2}/src/together/types/embeddings.py +0 -0
  54. {together-1.4.5 → together-1.5.2}/src/together/types/endpoints.py +0 -0
  55. {together-1.4.5 → together-1.5.2}/src/together/types/error.py +0 -0
  56. {together-1.4.5 → together-1.5.2}/src/together/types/files.py +0 -0
  57. {together-1.4.5 → together-1.5.2}/src/together/types/images.py +0 -0
  58. {together-1.4.5 → together-1.5.2}/src/together/types/models.py +0 -0
  59. {together-1.4.5 → together-1.5.2}/src/together/types/rerank.py +0 -0
  60. {together-1.4.5 → together-1.5.2}/src/together/utils/__init__.py +0 -0
  61. {together-1.4.5 → together-1.5.2}/src/together/utils/_log.py +0 -0
  62. {together-1.4.5 → together-1.5.2}/src/together/utils/api_helpers.py +0 -0
  63. {together-1.4.5 → together-1.5.2}/src/together/utils/files.py +0 -0
  64. {together-1.4.5 → together-1.5.2}/src/together/utils/tools.py +0 -0
  65. {together-1.4.5 → together-1.5.2}/src/together/version.py +0 -0
@@ -1,15 +1,14 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: together
3
- Version: 1.4.5
3
+ Version: 1.5.2
4
4
  Summary: Python client for Together's Cloud Platform!
5
5
  License: Apache-2.0
6
6
  Author: Together AI
7
7
  Author-email: support@together.ai
8
- Requires-Python: >=3.9,<4.0
8
+ Requires-Python: >=3.10,<4.0
9
9
  Classifier: License :: OSI Approved :: Apache Software License
10
10
  Classifier: Operating System :: POSIX :: Linux
11
11
  Classifier: Programming Language :: Python :: 3
12
- Classifier: Programming Language :: Python :: 3.9
13
12
  Classifier: Programming Language :: Python :: 3.10
14
13
  Classifier: Programming Language :: Python :: 3.11
15
14
  Classifier: Programming Language :: Python :: 3.12
@@ -45,7 +44,7 @@ Description-Content-Type: text/markdown
45
44
  [![Discord](https://dcbadge.vercel.app/api/server/9Rk6sSeWEG?style=flat&compact=true)](https://discord.com/invite/9Rk6sSeWEG)
46
45
  [![Twitter](https://img.shields.io/twitter/url/https/twitter.com/togethercompute.svg?style=social&label=Follow%20%40togethercompute)](https://twitter.com/togethercompute)
47
46
 
48
- The [Together Python API Library](https://pypi.org/project/together/) is the official Python client for Together's API platform, providing a convenient way for interacting with the REST APIs and enables easy integrations with Python 3.8+ applications with easy to use synchronous and asynchronous clients.
47
+ The [Together Python API Library](https://pypi.org/project/together/) is the official Python client for Together's API platform, providing a convenient way for interacting with the REST APIs and enables easy integrations with Python 3.10+ applications with easy to use synchronous and asynchronous clients.
49
48
 
50
49
 
51
50
 
@@ -10,7 +10,7 @@
10
10
  [![Discord](https://dcbadge.vercel.app/api/server/9Rk6sSeWEG?style=flat&compact=true)](https://discord.com/invite/9Rk6sSeWEG)
11
11
  [![Twitter](https://img.shields.io/twitter/url/https/twitter.com/togethercompute.svg?style=social&label=Follow%20%40togethercompute)](https://twitter.com/togethercompute)
12
12
 
13
- The [Together Python API Library](https://pypi.org/project/together/) is the official Python client for Together's API platform, providing a convenient way for interacting with the REST APIs and enables easy integrations with Python 3.8+ applications with easy to use synchronous and asynchronous clients.
13
+ The [Together Python API Library](https://pypi.org/project/together/) is the official Python client for Together's API platform, providing a convenient way for interacting with the REST APIs and enables easy integrations with Python 3.10+ applications with easy to use synchronous and asynchronous clients.
14
14
 
15
15
 
16
16
 
@@ -12,7 +12,7 @@ build-backend = "poetry.masonry.api"
12
12
 
13
13
  [tool.poetry]
14
14
  name = "together"
15
- version = "1.4.5"
15
+ version = "1.5.2"
16
16
  authors = [
17
17
  "Together AI <support@together.ai>"
18
18
  ]
@@ -28,7 +28,7 @@ repository = "https://github.com/togethercomputer/together-python"
28
28
  homepage = "https://github.com/togethercomputer/together-python"
29
29
 
30
30
  [tool.poetry.dependencies]
31
- python = "^3.9"
31
+ python = "^3.10"
32
32
  typer = ">=0.9,<0.16"
33
33
  requests = "^2.31.0"
34
34
  rich = "^13.8.1"
@@ -127,6 +127,11 @@ def endpoints(ctx: click.Context) -> None:
127
127
  is_flag=True,
128
128
  help="Create the endpoint in STOPPED state instead of auto-starting it",
129
129
  )
130
+ @click.option(
131
+ "--inactive-timeout",
132
+ type=int,
133
+ help="Number of minutes of inactivity after which the endpoint will be automatically stopped. Set to 0 to disable.",
134
+ )
130
135
  @click.option(
131
136
  "--wait",
132
137
  is_flag=True,
@@ -146,6 +151,7 @@ def create(
146
151
  no_prompt_cache: bool,
147
152
  no_speculative_decoding: bool,
148
153
  no_auto_start: bool,
154
+ inactive_timeout: int | None,
149
155
  wait: bool,
150
156
  ) -> None:
151
157
  """Create a new dedicated inference endpoint."""
@@ -170,6 +176,7 @@ def create(
170
176
  disable_prompt_cache=no_prompt_cache,
171
177
  disable_speculative_decoding=no_speculative_decoding,
172
178
  state="STOPPED" if no_auto_start else "STARTED",
179
+ inactive_timeout=inactive_timeout,
173
180
  )
174
181
  except InvalidRequestError as e:
175
182
  print_api_error(e)
@@ -194,6 +201,8 @@ def create(
194
201
  click.echo(" Speculative decoding: disabled", err=True)
195
202
  if no_auto_start:
196
203
  click.echo(" Auto-start: disabled", err=True)
204
+ if inactive_timeout is not None:
205
+ click.echo(f" Inactive timeout: {inactive_timeout} minutes", err=True)
197
206
 
198
207
  click.echo(f"Endpoint created successfully, id: {response.id}", err=True)
199
208
 
@@ -371,6 +380,11 @@ def list(
371
380
  type=int,
372
381
  help="New maximum number of replicas to scale up to",
373
382
  )
383
+ @click.option(
384
+ "--inactive-timeout",
385
+ type=int,
386
+ help="Number of minutes of inactivity after which the endpoint will be automatically stopped. Set to 0 to disable.",
387
+ )
374
388
  @click.pass_obj
375
389
  @handle_api_errors
376
390
  def update(
@@ -379,9 +393,10 @@ def update(
379
393
  display_name: str | None,
380
394
  min_replicas: int | None,
381
395
  max_replicas: int | None,
396
+ inactive_timeout: int | None,
382
397
  ) -> None:
383
398
  """Update a dedicated inference endpoint's configuration."""
384
- if not any([display_name, min_replicas, max_replicas]):
399
+ if not any([display_name, min_replicas, max_replicas, inactive_timeout]):
385
400
  click.echo("Error: At least one update option must be specified", err=True)
386
401
  sys.exit(1)
387
402
 
@@ -400,6 +415,8 @@ def update(
400
415
  if min_replicas is not None and max_replicas is not None:
401
416
  kwargs["min_replicas"] = min_replicas
402
417
  kwargs["max_replicas"] = max_replicas
418
+ if inactive_timeout is not None:
419
+ kwargs["inactive_timeout"] = inactive_timeout
403
420
 
404
421
  _response = client.endpoints.update(endpoint_id, **kwargs)
405
422
 
@@ -410,6 +427,8 @@ def update(
410
427
  if min_replicas is not None and max_replicas is not None:
411
428
  click.echo(f" Min replicas: {min_replicas}", err=True)
412
429
  click.echo(f" Max replicas: {max_replicas}", err=True)
430
+ if inactive_timeout is not None:
431
+ click.echo(f" Inactive timeout: {inactive_timeout} minutes", err=True)
413
432
 
414
433
  click.echo("Successfully updated endpoint", err=True)
415
434
  click.echo(endpoint_id)
@@ -58,30 +58,50 @@ def fine_tuning(ctx: click.Context) -> None:
58
58
  @fine_tuning.command()
59
59
  @click.pass_context
60
60
  @click.option(
61
- "--training-file", type=str, required=True, help="Training file ID from Files API"
61
+ "--training-file",
62
+ "-t",
63
+ type=str,
64
+ required=True,
65
+ help="Training file ID from Files API",
66
+ )
67
+ @click.option("--model", "-m", type=str, help="Base model name")
68
+ @click.option(
69
+ "--n-epochs", "-ne", type=int, default=1, help="Number of epochs to train for"
62
70
  )
63
- @click.option("--model", type=str, required=True, help="Base model name")
64
- @click.option("--n-epochs", type=int, default=1, help="Number of epochs to train for")
65
71
  @click.option(
66
72
  "--validation-file", type=str, default="", help="Validation file ID from Files API"
67
73
  )
68
74
  @click.option("--n-evals", type=int, default=0, help="Number of evaluation loops")
69
75
  @click.option(
70
- "--n-checkpoints", type=int, default=1, help="Number of checkpoints to save"
76
+ "--n-checkpoints", "-c", type=int, default=1, help="Number of checkpoints to save"
77
+ )
78
+ @click.option(
79
+ "--batch-size", "-b", type=INT_WITH_MAX, default="max", help="Train batch size"
80
+ )
81
+ @click.option("--learning-rate", "-lr", type=float, default=1e-5, help="Learning rate")
82
+ @click.option(
83
+ "--lr-scheduler-type",
84
+ type=click.Choice(["linear", "cosine"]),
85
+ default="linear",
86
+ help="Learning rate scheduler type",
71
87
  )
72
- @click.option("--batch-size", type=INT_WITH_MAX, default="max", help="Train batch size")
73
- @click.option("--learning-rate", type=float, default=1e-5, help="Learning rate")
74
88
  @click.option(
75
89
  "--min-lr-ratio",
76
90
  type=float,
77
91
  default=0.0,
78
92
  help="The ratio of the final learning rate to the peak learning rate",
79
93
  )
94
+ @click.option(
95
+ "--scheduler-num-cycles",
96
+ type=float,
97
+ default=0.5,
98
+ help="Number or fraction of cycles for the cosine learning rate scheduler.",
99
+ )
80
100
  @click.option(
81
101
  "--warmup-ratio",
82
102
  type=float,
83
103
  default=0.0,
84
- help="Warmup ratio for learning rate scheduler.",
104
+ help="Warmup ratio for the learning rate scheduler.",
85
105
  )
86
106
  @click.option(
87
107
  "--max-grad-norm",
@@ -123,7 +143,11 @@ def fine_tuning(ctx: click.Context) -> None:
123
143
  help="Beta parameter for DPO training (only used when '--training-method' is 'dpo')",
124
144
  )
125
145
  @click.option(
126
- "--suffix", type=str, default=None, help="Suffix for the fine-tuned model name"
146
+ "--suffix",
147
+ "-s",
148
+ type=str,
149
+ default=None,
150
+ help="Suffix for the fine-tuned model name",
127
151
  )
128
152
  @click.option("--wandb-api-key", type=str, default=None, help="Wandb API key")
129
153
  @click.option("--wandb-base-url", type=str, default=None, help="Wandb base URL")
@@ -162,7 +186,9 @@ def create(
162
186
  n_checkpoints: int,
163
187
  batch_size: int | Literal["max"],
164
188
  learning_rate: float,
189
+ lr_scheduler_type: Literal["linear", "cosine"],
165
190
  min_lr_ratio: float,
191
+ scheduler_num_cycles: float,
166
192
  warmup_ratio: float,
167
193
  max_grad_norm: float,
168
194
  weight_decay: float,
@@ -194,7 +220,9 @@ def create(
194
220
  n_checkpoints=n_checkpoints,
195
221
  batch_size=batch_size,
196
222
  learning_rate=learning_rate,
223
+ lr_scheduler_type=lr_scheduler_type,
197
224
  min_lr_ratio=min_lr_ratio,
225
+ scheduler_num_cycles=scheduler_num_cycles,
198
226
  warmup_ratio=warmup_ratio,
199
227
  max_grad_norm=max_grad_norm,
200
228
  weight_decay=weight_decay,
@@ -214,8 +242,15 @@ def create(
214
242
  from_checkpoint=from_checkpoint,
215
243
  )
216
244
 
245
+ if model is None and from_checkpoint is None:
246
+ raise click.BadParameter("You must specify either a model or a checkpoint")
247
+
248
+ model_name = model
249
+ if from_checkpoint is not None:
250
+ model_name = from_checkpoint.split(":")[0]
251
+
217
252
  model_limits: FinetuneTrainingLimits = client.fine_tuning.get_model_limits(
218
- model=model
253
+ model=model_name
219
254
  )
220
255
 
221
256
  if lora:
@@ -411,6 +446,7 @@ def list_checkpoints(ctx: click.Context, fine_tune_id: str) -> None:
411
446
  @click.argument("fine_tune_id", type=str, required=True)
412
447
  @click.option(
413
448
  "--output_dir",
449
+ "-o",
414
450
  type=click.Path(exists=True, file_okay=False, resolve_path=True),
415
451
  required=False,
416
452
  default=None,
@@ -418,6 +454,7 @@ def list_checkpoints(ctx: click.Context, fine_tune_id: str) -> None:
418
454
  )
419
455
  @click.option(
420
456
  "--checkpoint-step",
457
+ "-s",
421
458
  type=int,
422
459
  required=False,
423
460
  default=None,
@@ -1,4 +1,4 @@
1
- from textwrap import wrap
1
+ import json as json_lib
2
2
 
3
3
  import click
4
4
  from tabulate import tabulate
@@ -15,12 +15,22 @@ def models(ctx: click.Context) -> None:
15
15
 
16
16
 
17
17
  @models.command()
18
+ @click.option(
19
+ "--type",
20
+ type=click.Choice(["dedicated"]),
21
+ help="Filter models by type (dedicated: models that can be deployed as dedicated endpoints)",
22
+ )
23
+ @click.option(
24
+ "--json",
25
+ is_flag=True,
26
+ help="Output in JSON format",
27
+ )
18
28
  @click.pass_context
19
- def list(ctx: click.Context) -> None:
29
+ def list(ctx: click.Context, type: str | None, json: bool) -> None:
20
30
  """List models"""
21
31
  client: Together = ctx.obj
22
32
 
23
- response = client.models.list()
33
+ response = client.models.list(dedicated=(type == "dedicated"))
24
34
 
25
35
  display_list = []
26
36
 
@@ -28,15 +38,18 @@ def list(ctx: click.Context) -> None:
28
38
  for model in response:
29
39
  display_list.append(
30
40
  {
31
- "ID": "\n".join(wrap(model.id or "", width=30)),
32
- "Name": "\n".join(wrap(model.display_name or "", width=30)),
41
+ "ID": model.id,
42
+ "Name": model.display_name,
33
43
  "Organization": model.organization,
34
44
  "Type": model.type,
35
45
  "Context Length": model.context_length,
36
- "License": "\n".join(wrap(model.license or "", width=30)),
46
+ "License": model.license,
37
47
  "Input per 1M token": model.pricing.input,
38
48
  "Output per 1M token": model.pricing.output,
39
49
  }
40
50
  )
41
51
 
42
- click.echo(tabulate(display_list, headers="keys", tablefmt="grid"))
52
+ if json:
53
+ click.echo(json_lib.dumps(display_list, indent=2))
54
+ else:
55
+ click.echo(tabulate(display_list, headers="keys", tablefmt="plain"))
@@ -59,6 +59,7 @@ class Endpoints:
59
59
  disable_prompt_cache: bool = False,
60
60
  disable_speculative_decoding: bool = False,
61
61
  state: Literal["STARTED", "STOPPED"] = "STARTED",
62
+ inactive_timeout: Optional[int] = None,
62
63
  ) -> DedicatedEndpoint:
63
64
  """
64
65
  Create a new dedicated endpoint.
@@ -72,6 +73,7 @@ class Endpoints:
72
73
  disable_prompt_cache (bool, optional): Whether to disable the prompt cache. Defaults to False.
73
74
  disable_speculative_decoding (bool, optional): Whether to disable speculative decoding. Defaults to False.
74
75
  state (str, optional): The desired state of the endpoint. Defaults to "STARTED".
76
+ inactive_timeout (int, optional): The number of minutes of inactivity after which the endpoint will be automatically stopped. Set to 0 to disable automatic timeout.
75
77
 
76
78
  Returns:
77
79
  DedicatedEndpoint: Object containing endpoint information
@@ -80,7 +82,7 @@ class Endpoints:
80
82
  client=self._client,
81
83
  )
82
84
 
83
- data: Dict[str, Union[str, bool, Dict[str, int]]] = {
85
+ data: Dict[str, Union[str, bool, Dict[str, int], int]] = {
84
86
  "model": model,
85
87
  "hardware": hardware,
86
88
  "autoscaling": {
@@ -95,6 +97,9 @@ class Endpoints:
95
97
  if display_name is not None:
96
98
  data["display_name"] = display_name
97
99
 
100
+ if inactive_timeout is not None:
101
+ data["inactive_timeout"] = inactive_timeout
102
+
98
103
  response, _, _ = requestor.request(
99
104
  options=TogetherRequest(
100
105
  method="POST",
@@ -161,6 +166,7 @@ class Endpoints:
161
166
  max_replicas: Optional[int] = None,
162
167
  state: Optional[Literal["STARTED", "STOPPED"]] = None,
163
168
  display_name: Optional[str] = None,
169
+ inactive_timeout: Optional[int] = None,
164
170
  ) -> DedicatedEndpoint:
165
171
  """
166
172
  Update an endpoint's configuration.
@@ -171,6 +177,7 @@ class Endpoints:
171
177
  max_replicas (int, optional): The maximum number of replicas to scale up to
172
178
  state (str, optional): The desired state of the endpoint ("STARTED" or "STOPPED")
173
179
  display_name (str, optional): A human-readable name for the endpoint
180
+ inactive_timeout (int, optional): The number of minutes of inactivity after which the endpoint will be automatically stopped. Set to 0 to disable automatic timeout.
174
181
 
175
182
  Returns:
176
183
  DedicatedEndpoint: Object containing endpoint information
@@ -179,7 +186,7 @@ class Endpoints:
179
186
  client=self._client,
180
187
  )
181
188
 
182
- data: Dict[str, Union[str, Dict[str, int]]] = {}
189
+ data: Dict[str, Union[str, Dict[str, int], int]] = {}
183
190
 
184
191
  if min_replicas is not None or max_replicas is not None:
185
192
  current_min = min_replicas
@@ -200,6 +207,9 @@ class Endpoints:
200
207
  if display_name is not None:
201
208
  data["display_name"] = display_name
202
209
 
210
+ if inactive_timeout is not None:
211
+ data["inactive_timeout"] = inactive_timeout
212
+
203
213
  response, _, _ = requestor.request(
204
214
  options=TogetherRequest(
205
215
  method="PATCH",
@@ -297,6 +307,7 @@ class AsyncEndpoints:
297
307
  disable_prompt_cache: bool = False,
298
308
  disable_speculative_decoding: bool = False,
299
309
  state: Literal["STARTED", "STOPPED"] = "STARTED",
310
+ inactive_timeout: Optional[int] = None,
300
311
  ) -> DedicatedEndpoint:
301
312
  """
302
313
  Create a new dedicated endpoint.
@@ -310,6 +321,7 @@ class AsyncEndpoints:
310
321
  disable_prompt_cache (bool, optional): Whether to disable the prompt cache. Defaults to False.
311
322
  disable_speculative_decoding (bool, optional): Whether to disable speculative decoding. Defaults to False.
312
323
  state (str, optional): The desired state of the endpoint. Defaults to "STARTED".
324
+ inactive_timeout (int, optional): The number of minutes of inactivity after which the endpoint will be automatically stopped. Set to 0 to disable automatic timeout.
313
325
 
314
326
  Returns:
315
327
  DedicatedEndpoint: Object containing endpoint information
@@ -318,7 +330,7 @@ class AsyncEndpoints:
318
330
  client=self._client,
319
331
  )
320
332
 
321
- data: Dict[str, Union[str, bool, Dict[str, int]]] = {
333
+ data: Dict[str, Union[str, bool, Dict[str, int], int]] = {
322
334
  "model": model,
323
335
  "hardware": hardware,
324
336
  "autoscaling": {
@@ -333,6 +345,9 @@ class AsyncEndpoints:
333
345
  if display_name is not None:
334
346
  data["display_name"] = display_name
335
347
 
348
+ if inactive_timeout is not None:
349
+ data["inactive_timeout"] = inactive_timeout
350
+
336
351
  response, _, _ = await requestor.arequest(
337
352
  options=TogetherRequest(
338
353
  method="POST",
@@ -399,6 +414,7 @@ class AsyncEndpoints:
399
414
  max_replicas: Optional[int] = None,
400
415
  state: Optional[Literal["STARTED", "STOPPED"]] = None,
401
416
  display_name: Optional[str] = None,
417
+ inactive_timeout: Optional[int] = None,
402
418
  ) -> DedicatedEndpoint:
403
419
  """
404
420
  Update an endpoint's configuration.
@@ -409,6 +425,7 @@ class AsyncEndpoints:
409
425
  max_replicas (int, optional): The maximum number of replicas to scale up to
410
426
  state (str, optional): The desired state of the endpoint ("STARTED" or "STOPPED")
411
427
  display_name (str, optional): A human-readable name for the endpoint
428
+ inactive_timeout (int, optional): The number of minutes of inactivity after which the endpoint will be automatically stopped. Set to 0 to disable automatic timeout.
412
429
 
413
430
  Returns:
414
431
  DedicatedEndpoint: Object containing endpoint information
@@ -417,7 +434,7 @@ class AsyncEndpoints:
417
434
  client=self._client,
418
435
  )
419
436
 
420
- data: Dict[str, Union[str, Dict[str, int]]] = {}
437
+ data: Dict[str, Union[str, Dict[str, int], int]] = {}
421
438
 
422
439
  if min_replicas is not None or max_replicas is not None:
423
440
  current_min = min_replicas
@@ -438,6 +455,9 @@ class AsyncEndpoints:
438
455
  if display_name is not None:
439
456
  data["display_name"] = display_name
440
457
 
458
+ if inactive_timeout is not None:
459
+ data["inactive_timeout"] = inactive_timeout
460
+
441
461
  response, _, _ = await requestor.arequest(
442
462
  options=TogetherRequest(
443
463
  method="PATCH",
@@ -22,7 +22,10 @@ from together.types import (
22
22
  TogetherRequest,
23
23
  TrainingType,
24
24
  FinetuneLRScheduler,
25
+ FinetuneLinearLRScheduler,
26
+ FinetuneCosineLRScheduler,
25
27
  FinetuneLinearLRSchedulerArgs,
28
+ FinetuneCosineLRSchedulerArgs,
26
29
  TrainingMethodDPO,
27
30
  TrainingMethodSFT,
28
31
  FinetuneCheckpoint,
@@ -50,14 +53,16 @@ AVAILABLE_TRAINING_METHODS = {
50
53
  def createFinetuneRequest(
51
54
  model_limits: FinetuneTrainingLimits,
52
55
  training_file: str,
53
- model: str,
56
+ model: str | None = None,
54
57
  n_epochs: int = 1,
55
58
  validation_file: str | None = "",
56
59
  n_evals: int | None = 0,
57
60
  n_checkpoints: int | None = 1,
58
61
  batch_size: int | Literal["max"] = "max",
59
62
  learning_rate: float | None = 0.00001,
63
+ lr_scheduler_type: Literal["linear", "cosine"] = "linear",
60
64
  min_lr_ratio: float = 0.0,
65
+ scheduler_num_cycles: float = 0.5,
61
66
  warmup_ratio: float = 0.0,
62
67
  max_grad_norm: float = 1.0,
63
68
  weight_decay: float = 0.0,
@@ -77,6 +82,11 @@ def createFinetuneRequest(
77
82
  from_checkpoint: str | None = None,
78
83
  ) -> FinetuneRequest:
79
84
 
85
+ if model is not None and from_checkpoint is not None:
86
+ raise ValueError(
87
+ "You must specify either a model or a checkpoint to start a job from, not both"
88
+ )
89
+
80
90
  if batch_size == "max":
81
91
  log_warn_once(
82
92
  "Starting from together>=1.3.0, "
@@ -129,10 +139,22 @@ def createFinetuneRequest(
129
139
  f"training_method must be one of {', '.join(AVAILABLE_TRAINING_METHODS)}"
130
140
  )
131
141
 
132
- lrScheduler = FinetuneLRScheduler(
133
- lr_scheduler_type="linear",
134
- lr_scheduler_args=FinetuneLinearLRSchedulerArgs(min_lr_ratio=min_lr_ratio),
135
- )
142
+ # Default to generic lr scheduler
143
+ lrScheduler: FinetuneLRScheduler = FinetuneLRScheduler(lr_scheduler_type="linear")
144
+
145
+ if lr_scheduler_type == "cosine":
146
+ if scheduler_num_cycles <= 0.0:
147
+ raise ValueError("Number of cycles should be greater than 0")
148
+
149
+ lrScheduler = FinetuneCosineLRScheduler(
150
+ lr_scheduler_args=FinetuneCosineLRSchedulerArgs(
151
+ min_lr_ratio=min_lr_ratio, num_cycles=scheduler_num_cycles
152
+ ),
153
+ )
154
+ else:
155
+ lrScheduler = FinetuneLinearLRScheduler(
156
+ lr_scheduler_args=FinetuneLinearLRSchedulerArgs(min_lr_ratio=min_lr_ratio),
157
+ )
136
158
 
137
159
  training_method_cls: TrainingMethodSFT | TrainingMethodDPO = TrainingMethodSFT()
138
160
  if training_method == "dpo":
@@ -237,14 +259,16 @@ class FineTuning:
237
259
  self,
238
260
  *,
239
261
  training_file: str,
240
- model: str,
262
+ model: str | None = None,
241
263
  n_epochs: int = 1,
242
264
  validation_file: str | None = "",
243
265
  n_evals: int | None = 0,
244
266
  n_checkpoints: int | None = 1,
245
267
  batch_size: int | Literal["max"] = "max",
246
268
  learning_rate: float | None = 0.00001,
269
+ lr_scheduler_type: Literal["linear", "cosine"] = "linear",
247
270
  min_lr_ratio: float = 0.0,
271
+ scheduler_num_cycles: float = 0.5,
248
272
  warmup_ratio: float = 0.0,
249
273
  max_grad_norm: float = 1.0,
250
274
  weight_decay: float = 0.0,
@@ -270,7 +294,7 @@ class FineTuning:
270
294
 
271
295
  Args:
272
296
  training_file (str): File-ID of a file uploaded to the Together API
273
- model (str): Name of the base model to run fine-tune job on
297
+ model (str, optional): Name of the base model to run fine-tune job on
274
298
  n_epochs (int, optional): Number of epochs for fine-tuning. Defaults to 1.
275
299
  validation file (str, optional): File ID of a file uploaded to the Together API for validation.
276
300
  n_evals (int, optional): Number of evaluation loops to run. Defaults to 0.
@@ -279,9 +303,11 @@ class FineTuning:
279
303
  batch_size (int or "max"): Batch size for fine-tuning. Defaults to max.
280
304
  learning_rate (float, optional): Learning rate multiplier to use for training
281
305
  Defaults to 0.00001.
306
+ lr_scheduler_type (Literal["linear", "cosine"]): Learning rate scheduler type. Defaults to "linear".
282
307
  min_lr_ratio (float, optional): Min learning rate ratio of the initial learning rate for
283
308
  the learning rate scheduler. Defaults to 0.0.
284
- warmup_ratio (float, optional): Warmup ratio for learning rate scheduler.
309
+ scheduler_num_cycles (float, optional): Number or fraction of cycles for the cosine learning rate scheduler. Defaults to 0.5.
310
+ warmup_ratio (float, optional): Warmup ratio for the learning rate scheduler.
285
311
  max_grad_norm (float, optional): Max gradient norm. Defaults to 1.0, set to 0 to disable.
286
312
  weight_decay (float, optional): Weight decay. Defaults to 0.0.
287
313
  lora (bool, optional): Whether to use LoRA adapters. Defaults to True.
@@ -320,12 +346,24 @@ class FineTuning:
320
346
  FinetuneResponse: Object containing information about fine-tuning job.
321
347
  """
322
348
 
349
+ if model is None and from_checkpoint is None:
350
+ raise ValueError("You must specify either a model or a checkpoint")
351
+
323
352
  requestor = api_requestor.APIRequestor(
324
353
  client=self._client,
325
354
  )
326
355
 
327
356
  if model_limits is None:
328
- model_limits = self.get_model_limits(model=model)
357
+ # mypy doesn't understand that model or from_checkpoint is not None
358
+ if model is not None:
359
+ model_name = model
360
+ elif from_checkpoint is not None:
361
+ model_name = from_checkpoint.split(":")[0]
362
+ else:
363
+ # this branch is unreachable, but mypy doesn't know that
364
+ pass
365
+ model_limits = self.get_model_limits(model=model_name)
366
+
329
367
  finetune_request = createFinetuneRequest(
330
368
  model_limits=model_limits,
331
369
  training_file=training_file,
@@ -336,7 +374,9 @@ class FineTuning:
336
374
  n_checkpoints=n_checkpoints,
337
375
  batch_size=batch_size,
338
376
  learning_rate=learning_rate,
377
+ lr_scheduler_type=lr_scheduler_type,
339
378
  min_lr_ratio=min_lr_ratio,
379
+ scheduler_num_cycles=scheduler_num_cycles,
340
380
  warmup_ratio=warmup_ratio,
341
381
  max_grad_norm=max_grad_norm,
342
382
  weight_decay=weight_decay,
@@ -610,14 +650,16 @@ class AsyncFineTuning:
610
650
  self,
611
651
  *,
612
652
  training_file: str,
613
- model: str,
653
+ model: str | None = None,
614
654
  n_epochs: int = 1,
615
655
  validation_file: str | None = "",
616
656
  n_evals: int | None = 0,
617
657
  n_checkpoints: int | None = 1,
618
658
  batch_size: int | Literal["max"] = "max",
619
659
  learning_rate: float | None = 0.00001,
660
+ lr_scheduler_type: Literal["linear", "cosine"] = "linear",
620
661
  min_lr_ratio: float = 0.0,
662
+ scheduler_num_cycles: float = 0.5,
621
663
  warmup_ratio: float = 0.0,
622
664
  max_grad_norm: float = 1.0,
623
665
  weight_decay: float = 0.0,
@@ -643,7 +685,7 @@ class AsyncFineTuning:
643
685
 
644
686
  Args:
645
687
  training_file (str): File-ID of a file uploaded to the Together API
646
- model (str): Name of the base model to run fine-tune job on
688
+ model (str, optional): Name of the base model to run fine-tune job on
647
689
  n_epochs (int, optional): Number of epochs for fine-tuning. Defaults to 1.
648
690
  validation file (str, optional): File ID of a file uploaded to the Together API for validation.
649
691
  n_evals (int, optional): Number of evaluation loops to run. Defaults to 0.
@@ -652,9 +694,11 @@ class AsyncFineTuning:
652
694
  batch_size (int, optional): Batch size for fine-tuning. Defaults to max.
653
695
  learning_rate (float, optional): Learning rate multiplier to use for training
654
696
  Defaults to 0.00001.
697
+ lr_scheduler_type (Literal["linear", "cosine"]): Learning rate scheduler type. Defaults to "linear".
655
698
  min_lr_ratio (float, optional): Min learning rate ratio of the initial learning rate for
656
699
  the learning rate scheduler. Defaults to 0.0.
657
- warmup_ratio (float, optional): Warmup ratio for learning rate scheduler.
700
+ scheduler_num_cycles (float, optional): Number or fraction of cycles for the cosine learning rate scheduler. Defaults to 0.5.
701
+ warmup_ratio (float, optional): Warmup ratio for the learning rate scheduler.
658
702
  max_grad_norm (float, optional): Max gradient norm. Defaults to 1.0, set to 0 to disable.
659
703
  weight_decay (float, optional): Weight decay. Defaults to 0.0.
660
704
  lora (bool, optional): Whether to use LoRA adapters. Defaults to True.
@@ -693,12 +737,23 @@ class AsyncFineTuning:
693
737
  FinetuneResponse: Object containing information about fine-tuning job.
694
738
  """
695
739
 
740
+ if model is None and from_checkpoint is None:
741
+ raise ValueError("You must specify either a model or a checkpoint")
742
+
696
743
  requestor = api_requestor.APIRequestor(
697
744
  client=self._client,
698
745
  )
699
746
 
700
747
  if model_limits is None:
701
- model_limits = await self.get_model_limits(model=model)
748
+ # mypy doesn't understand that model or from_checkpoint is not None
749
+ if model is not None:
750
+ model_name = model
751
+ elif from_checkpoint is not None:
752
+ model_name = from_checkpoint.split(":")[0]
753
+ else:
754
+ # this branch is unreachable, but mypy doesn't know that
755
+ pass
756
+ model_limits = await self.get_model_limits(model=model_name)
702
757
 
703
758
  finetune_request = createFinetuneRequest(
704
759
  model_limits=model_limits,
@@ -710,7 +765,9 @@ class AsyncFineTuning:
710
765
  n_checkpoints=n_checkpoints,
711
766
  batch_size=batch_size,
712
767
  learning_rate=learning_rate,
768
+ lr_scheduler_type=lr_scheduler_type,
713
769
  min_lr_ratio=min_lr_ratio,
770
+ scheduler_num_cycles=scheduler_num_cycles,
714
771
  warmup_ratio=warmup_ratio,
715
772
  max_grad_norm=max_grad_norm,
716
773
  weight_decay=weight_decay,
@@ -0,0 +1,134 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import List
4
+
5
+ from together.abstract import api_requestor
6
+ from together.together_response import TogetherResponse
7
+ from together.types import (
8
+ ModelObject,
9
+ TogetherClient,
10
+ TogetherRequest,
11
+ )
12
+
13
+
14
+ class ModelsBase:
15
+ def __init__(self, client: TogetherClient) -> None:
16
+ self._client = client
17
+
18
+ def _filter_dedicated_models(
19
+ self, models: List[ModelObject], dedicated_response: TogetherResponse
20
+ ) -> List[ModelObject]:
21
+ """
22
+ Filter models based on dedicated model response.
23
+
24
+ Args:
25
+ models (List[ModelObject]): List of all models
26
+ dedicated_response (TogetherResponse): Response from autoscale models endpoint
27
+
28
+ Returns:
29
+ List[ModelObject]: Filtered list of models
30
+ """
31
+ assert isinstance(dedicated_response.data, list)
32
+
33
+ # Create a set of dedicated model names for efficient lookup
34
+ dedicated_model_names = {model["name"] for model in dedicated_response.data}
35
+
36
+ # Filter models to only include those in dedicated_model_names
37
+ # Note: The model.id from ModelObject matches the name field in the autoscale response
38
+ return [model for model in models if model.id in dedicated_model_names]
39
+
40
+
41
+ class Models(ModelsBase):
42
+ def list(
43
+ self,
44
+ dedicated: bool = False,
45
+ ) -> List[ModelObject]:
46
+ """
47
+ Method to return list of models on the API
48
+
49
+ Args:
50
+ dedicated (bool, optional): If True, returns only dedicated models. Defaults to False.
51
+
52
+ Returns:
53
+ List[ModelObject]: List of model objects
54
+ """
55
+ requestor = api_requestor.APIRequestor(
56
+ client=self._client,
57
+ )
58
+
59
+ response, _, _ = requestor.request(
60
+ options=TogetherRequest(
61
+ method="GET",
62
+ url="models",
63
+ ),
64
+ stream=False,
65
+ )
66
+
67
+ assert isinstance(response, TogetherResponse)
68
+ assert isinstance(response.data, list)
69
+
70
+ models = [ModelObject(**model) for model in response.data]
71
+
72
+ if dedicated:
73
+ # Get dedicated models
74
+ dedicated_response, _, _ = requestor.request(
75
+ options=TogetherRequest(
76
+ method="GET",
77
+ url="autoscale/models",
78
+ ),
79
+ stream=False,
80
+ )
81
+
82
+ models = self._filter_dedicated_models(models, dedicated_response)
83
+
84
+ models.sort(key=lambda x: x.id.lower())
85
+
86
+ return models
87
+
88
+
89
+ class AsyncModels(ModelsBase):
90
+ async def list(
91
+ self,
92
+ dedicated: bool = False,
93
+ ) -> List[ModelObject]:
94
+ """
95
+ Async method to return list of models on API
96
+
97
+ Args:
98
+ dedicated (bool, optional): If True, returns only dedicated models. Defaults to False.
99
+
100
+ Returns:
101
+ List[ModelObject]: List of model objects
102
+ """
103
+ requestor = api_requestor.APIRequestor(
104
+ client=self._client,
105
+ )
106
+
107
+ response, _, _ = await requestor.arequest(
108
+ options=TogetherRequest(
109
+ method="GET",
110
+ url="models",
111
+ ),
112
+ stream=False,
113
+ )
114
+
115
+ assert isinstance(response, TogetherResponse)
116
+ assert isinstance(response.data, list)
117
+
118
+ models = [ModelObject(**model) for model in response.data]
119
+
120
+ if dedicated:
121
+ # Get dedicated models
122
+ dedicated_response, _, _ = await requestor.arequest(
123
+ options=TogetherRequest(
124
+ method="GET",
125
+ url="autoscale/models",
126
+ ),
127
+ stream=False,
128
+ )
129
+
130
+ models = self._filter_dedicated_models(models, dedicated_response)
131
+
132
+ models.sort(key=lambda x: x.id.lower())
133
+
134
+ return models
@@ -34,11 +34,14 @@ from together.types.finetune import (
34
34
  TrainingMethodDPO,
35
35
  TrainingMethodSFT,
36
36
  FinetuneCheckpoint,
37
+ FinetuneCosineLRScheduler,
38
+ FinetuneCosineLRSchedulerArgs,
37
39
  FinetuneDownloadResult,
40
+ FinetuneLinearLRScheduler,
38
41
  FinetuneLinearLRSchedulerArgs,
42
+ FinetuneLRScheduler,
39
43
  FinetuneList,
40
44
  FinetuneListEvents,
41
- FinetuneLRScheduler,
42
45
  FinetuneRequest,
43
46
  FinetuneResponse,
44
47
  FinetuneTrainingLimits,
@@ -69,7 +72,10 @@ __all__ = [
69
72
  "FinetuneListEvents",
70
73
  "FinetuneDownloadResult",
71
74
  "FinetuneLRScheduler",
75
+ "FinetuneLinearLRScheduler",
72
76
  "FinetuneLinearLRSchedulerArgs",
77
+ "FinetuneCosineLRScheduler",
78
+ "FinetuneCosineLRSchedulerArgs",
73
79
  "FileRequest",
74
80
  "FileResponse",
75
81
  "FileList",
@@ -1,9 +1,9 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  from enum import Enum
4
- from typing import List, Literal
4
+ from typing import List, Literal, Union
5
5
 
6
- from pydantic import StrictBool, Field, validator, field_validator
6
+ from pydantic import StrictBool, Field, validator, field_validator, ValidationInfo
7
7
 
8
8
  from together.types.abstract import BaseModel
9
9
  from together.types.common import (
@@ -176,7 +176,7 @@ class FinetuneRequest(BaseModel):
176
176
  # training learning rate
177
177
  learning_rate: float
178
178
  # learning rate scheduler type and args
179
- lr_scheduler: FinetuneLRScheduler | None = None
179
+ lr_scheduler: FinetuneLinearLRScheduler | FinetuneCosineLRScheduler | None = None
180
180
  # learning rate warmup ratio
181
181
  warmup_ratio: float
182
182
  # max gradient norm
@@ -239,7 +239,7 @@ class FinetuneResponse(BaseModel):
239
239
  # training learning rate
240
240
  learning_rate: float | None = None
241
241
  # learning rate scheduler type and args
242
- lr_scheduler: FinetuneLRScheduler | None = None
242
+ lr_scheduler: FinetuneLinearLRScheduler | FinetuneCosineLRScheduler | None = None
243
243
  # learning rate warmup ratio
244
244
  warmup_ratio: float | None = None
245
245
  # max gradient norm
@@ -345,13 +345,27 @@ class FinetuneTrainingLimits(BaseModel):
345
345
  lora_training: FinetuneLoraTrainingLimits | None = None
346
346
 
347
347
 
348
+ class FinetuneLinearLRSchedulerArgs(BaseModel):
349
+ min_lr_ratio: float | None = 0.0
350
+
351
+
352
+ class FinetuneCosineLRSchedulerArgs(BaseModel):
353
+ min_lr_ratio: float | None = 0.0
354
+ num_cycles: float | None = 0.5
355
+
356
+
348
357
  class FinetuneLRScheduler(BaseModel):
349
358
  lr_scheduler_type: str
350
- lr_scheduler_args: FinetuneLinearLRSchedulerArgs | None = None
351
359
 
352
360
 
353
- class FinetuneLinearLRSchedulerArgs(BaseModel):
354
- min_lr_ratio: float | None = 0.0
361
+ class FinetuneLinearLRScheduler(FinetuneLRScheduler):
362
+ lr_scheduler_type: Literal["linear"] = "linear"
363
+ lr_scheduler: FinetuneLinearLRSchedulerArgs | None = None
364
+
365
+
366
+ class FinetuneCosineLRScheduler(FinetuneLRScheduler):
367
+ lr_scheduler_type: Literal["cosine"] = "cosine"
368
+ lr_scheduler: FinetuneCosineLRSchedulerArgs | None = None
355
369
 
356
370
 
357
371
  class FinetuneCheckpoint(BaseModel):
@@ -1,75 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from typing import List
4
-
5
- from together.abstract import api_requestor
6
- from together.together_response import TogetherResponse
7
- from together.types import (
8
- ModelObject,
9
- TogetherClient,
10
- TogetherRequest,
11
- )
12
-
13
-
14
- class Models:
15
- def __init__(self, client: TogetherClient) -> None:
16
- self._client = client
17
-
18
- def list(
19
- self,
20
- ) -> List[ModelObject]:
21
- """
22
- Method to return list of models on the API
23
-
24
- Returns:
25
- List[ModelObject]: List of model objects
26
- """
27
-
28
- requestor = api_requestor.APIRequestor(
29
- client=self._client,
30
- )
31
-
32
- response, _, _ = requestor.request(
33
- options=TogetherRequest(
34
- method="GET",
35
- url="models",
36
- ),
37
- stream=False,
38
- )
39
-
40
- assert isinstance(response, TogetherResponse)
41
- assert isinstance(response.data, list)
42
-
43
- return [ModelObject(**model) for model in response.data]
44
-
45
-
46
- class AsyncModels:
47
- def __init__(self, client: TogetherClient) -> None:
48
- self._client = client
49
-
50
- async def list(
51
- self,
52
- ) -> List[ModelObject]:
53
- """
54
- Async method to return list of models on API
55
-
56
- Returns:
57
- List[ModelObject]: List of model objects
58
- """
59
-
60
- requestor = api_requestor.APIRequestor(
61
- client=self._client,
62
- )
63
-
64
- response, _, _ = await requestor.arequest(
65
- options=TogetherRequest(
66
- method="GET",
67
- url="models",
68
- ),
69
- stream=False,
70
- )
71
-
72
- assert isinstance(response, TogetherResponse)
73
- assert isinstance(response.data, list)
74
-
75
- return [ModelObject(**model) for model in response.data]
File without changes
File without changes