together 1.5.21__py3-none-any.whl → 1.5.23__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -89,18 +89,10 @@ def create_finetune_request(
89
89
 
90
90
  model_or_checkpoint = model or from_checkpoint
91
91
 
92
- if batch_size == "max":
93
- log_warn_once(
94
- "Starting from together>=1.3.0, "
95
- "the default batch size is set to the maximum allowed value for each model."
96
- )
97
92
  if warmup_ratio is None:
98
93
  warmup_ratio = 0.0
99
94
 
100
95
  training_type: TrainingType = FullTrainingType()
101
- max_batch_size: int = 0
102
- max_batch_size_dpo: int = 0
103
- min_batch_size: int = 0
104
96
  if lora:
105
97
  if model_limits.lora_training is None:
106
98
  raise ValueError(
@@ -133,28 +125,23 @@ def create_finetune_request(
133
125
  min_batch_size = model_limits.full_training.min_batch_size
134
126
  max_batch_size_dpo = model_limits.full_training.max_batch_size_dpo
135
127
 
136
- if batch_size == "max":
137
- if training_method == "dpo":
138
- batch_size = max_batch_size_dpo
139
- else:
140
- batch_size = max_batch_size
128
+ if batch_size != "max":
129
+ if training_method == "sft":
130
+ if batch_size > max_batch_size:
131
+ raise ValueError(
132
+ f"Requested batch size of {batch_size} is higher that the maximum allowed value of {max_batch_size}."
133
+ )
134
+ elif training_method == "dpo":
135
+ if batch_size > max_batch_size_dpo:
136
+ raise ValueError(
137
+ f"Requested batch size of {batch_size} is higher that the maximum allowed value of {max_batch_size_dpo}."
138
+ )
141
139
 
142
- if training_method == "sft":
143
- if batch_size > max_batch_size:
144
- raise ValueError(
145
- f"Requested batch size of {batch_size} is higher that the maximum allowed value of {max_batch_size}."
146
- )
147
- elif training_method == "dpo":
148
- if batch_size > max_batch_size_dpo:
140
+ if batch_size < min_batch_size:
149
141
  raise ValueError(
150
- f"Requested batch size of {batch_size} is higher that the maximum allowed value of {max_batch_size_dpo}."
142
+ f"Requested batch size of {batch_size} is lower that the minimum allowed value of {min_batch_size}."
151
143
  )
152
144
 
153
- if batch_size < min_batch_size:
154
- raise ValueError(
155
- f"Requested batch size of {batch_size} is lower that the minimum allowed value of {min_batch_size}."
156
- )
157
-
158
145
  if warmup_ratio > 1 or warmup_ratio < 0:
159
146
  raise ValueError(f"Warmup ratio should be between 0 and 1 (got {warmup_ratio})")
160
147
 
@@ -61,6 +61,19 @@ from together.types.images import ImageRequest, ImageResponse
61
61
  from together.types.models import ModelObject
62
62
  from together.types.rerank import RerankRequest, RerankResponse
63
63
  from together.types.batch import BatchJob, BatchJobStatus, BatchEndpoint
64
+ from together.types.evaluation import (
65
+ EvaluationType,
66
+ EvaluationStatus,
67
+ JudgeModelConfig,
68
+ ModelRequest,
69
+ ClassifyParameters,
70
+ ScoreParameters,
71
+ CompareParameters,
72
+ EvaluationRequest,
73
+ EvaluationCreateResponse,
74
+ EvaluationJob,
75
+ EvaluationStatusResponse,
76
+ )
64
77
 
65
78
 
66
79
  __all__ = [
@@ -124,4 +137,15 @@ __all__ = [
124
137
  "BatchJob",
125
138
  "BatchJobStatus",
126
139
  "BatchEndpoint",
140
+ "EvaluationType",
141
+ "EvaluationStatus",
142
+ "JudgeModelConfig",
143
+ "ModelRequest",
144
+ "ClassifyParameters",
145
+ "ScoreParameters",
146
+ "CompareParameters",
147
+ "EvaluationRequest",
148
+ "EvaluationCreateResponse",
149
+ "EvaluationJob",
150
+ "EvaluationStatusResponse",
127
151
  ]
@@ -0,0 +1,87 @@
1
+ from __future__ import annotations
2
+
3
+ from datetime import datetime
4
+ from enum import Enum
5
+ from typing import Any, Dict, List, Optional, Union
6
+
7
+ from pydantic import BaseModel, Field
8
+
9
+
10
+ class EvaluationType(str, Enum):
11
+ CLASSIFY = "classify"
12
+ SCORE = "score"
13
+ COMPARE = "compare"
14
+
15
+
16
+ class EvaluationStatus(str, Enum):
17
+ PENDING = "pending"
18
+ QUEUED = "queued"
19
+ RUNNING = "running"
20
+ COMPLETED = "completed"
21
+ ERROR = "error"
22
+ USER_ERROR = "user_error"
23
+
24
+
25
+ class JudgeModelConfig(BaseModel):
26
+ model_name: str
27
+ system_template: str
28
+
29
+
30
+ class ModelRequest(BaseModel):
31
+ model_name: str
32
+ max_tokens: int
33
+ temperature: float
34
+ system_template: str
35
+ input_template: str
36
+
37
+
38
+ class ClassifyParameters(BaseModel):
39
+ judge: JudgeModelConfig
40
+ labels: List[str]
41
+ pass_labels: List[str]
42
+ model_to_evaluate: Optional[Union[str, ModelRequest]] = None
43
+ input_data_file_path: str
44
+
45
+
46
+ class ScoreParameters(BaseModel):
47
+ judge: JudgeModelConfig
48
+ min_score: float
49
+ max_score: float
50
+ pass_threshold: float
51
+ model_to_evaluate: Optional[Union[str, ModelRequest]] = None
52
+ input_data_file_path: str
53
+
54
+
55
+ class CompareParameters(BaseModel):
56
+ judge: JudgeModelConfig
57
+ model_a: Optional[Union[str, ModelRequest]] = None
58
+ model_b: Optional[Union[str, ModelRequest]] = None
59
+ input_data_file_path: str
60
+
61
+
62
+ class EvaluationRequest(BaseModel):
63
+ type: EvaluationType
64
+ parameters: Union[ClassifyParameters, ScoreParameters, CompareParameters]
65
+
66
+
67
+ class EvaluationCreateResponse(BaseModel):
68
+ workflow_id: str
69
+ status: EvaluationStatus
70
+
71
+
72
+ class EvaluationJob(BaseModel):
73
+ workflow_id: str = Field(alias="workflow_id")
74
+ type: Optional[EvaluationType] = None
75
+ status: EvaluationStatus
76
+ results: Optional[Dict[str, Any]] = None
77
+ parameters: Optional[Dict[str, Any]] = None
78
+ created_at: Optional[datetime] = None
79
+ updated_at: Optional[datetime] = None
80
+
81
+ class Config:
82
+ populate_by_name = True
83
+
84
+
85
+ class EvaluationStatusResponse(BaseModel):
86
+ status: EvaluationStatus
87
+ results: Optional[Dict[str, Any]] = None
together/types/files.py CHANGED
@@ -14,11 +14,13 @@ from together.types.common import (
14
14
  class FilePurpose(str, Enum):
15
15
  FineTune = "fine-tune"
16
16
  BatchAPI = "batch-api"
17
+ Eval = "eval"
17
18
 
18
19
 
19
20
  class FileType(str, Enum):
20
21
  jsonl = "jsonl"
21
22
  parquet = "parquet"
23
+ csv = "csv"
22
24
 
23
25
 
24
26
  class FileRequest(BaseModel):
@@ -195,7 +195,7 @@ class FinetuneRequest(BaseModel):
195
195
  # number of evaluation loops to run
196
196
  n_evals: int | None = None
197
197
  # training batch size
198
- batch_size: int | None = None
198
+ batch_size: int | Literal["max"] | None = None
199
199
  # up to 40 character suffix for output model name
200
200
  suffix: str | None = None
201
201
  # weights & biases api key
together/utils/files.py CHANGED
@@ -2,6 +2,7 @@ from __future__ import annotations
2
2
 
3
3
  import json
4
4
  import os
5
+ import csv
5
6
  from pathlib import Path
6
7
  from traceback import format_exc
7
8
  from typing import Any, Dict, List
@@ -17,6 +18,7 @@ from together.constants import (
17
18
  POSSIBLE_ROLES_CONVERSATION,
18
19
  DatasetFormat,
19
20
  )
21
+ from together.types import FilePurpose
20
22
 
21
23
 
22
24
  class InvalidFileFormatError(ValueError):
@@ -36,6 +38,7 @@ class InvalidFileFormatError(ValueError):
36
38
 
37
39
  def check_file(
38
40
  file: Path | str,
41
+ purpose: FilePurpose | str = FilePurpose.FineTune,
39
42
  ) -> Dict[str, Any]:
40
43
  if not isinstance(file, Path):
41
44
  file = Path(file)
@@ -52,6 +55,7 @@ def check_file(
52
55
  "has_min_samples": None,
53
56
  "num_samples": None,
54
57
  "load_json": None,
58
+ "load_csv": None,
55
59
  }
56
60
 
57
61
  if not file.is_file():
@@ -79,10 +83,13 @@ def check_file(
79
83
  data_report_dict = {}
80
84
  if file.suffix == ".jsonl":
81
85
  report_dict["filetype"] = "jsonl"
82
- data_report_dict = _check_jsonl(file)
86
+ data_report_dict = _check_jsonl(file, purpose)
83
87
  elif file.suffix == ".parquet":
84
88
  report_dict["filetype"] = "parquet"
85
- data_report_dict = _check_parquet(file)
89
+ data_report_dict = _check_parquet(file, purpose)
90
+ elif file.suffix == ".csv":
91
+ report_dict["filetype"] = "csv"
92
+ data_report_dict = _check_csv(file, purpose)
86
93
  else:
87
94
  report_dict["filetype"] = (
88
95
  f"Unknown extension of file {file}. "
@@ -229,9 +236,15 @@ def validate_preference_openai(example: Dict[str, Any], idx: int = 0) -> None:
229
236
  validate_messages(example["non_preferred_output"], idx)
230
237
 
231
238
 
232
- def _check_jsonl(file: Path) -> Dict[str, Any]:
239
+ def _check_utf8(file: Path) -> Dict[str, Any]:
240
+ """Check if the file is UTF-8 encoded.
241
+
242
+ Args:
243
+ file (Path): Path to the file to check.
244
+ Returns:
245
+ Dict[str, Any]: A dictionary with the results of the check.
246
+ """
233
247
  report_dict: Dict[str, Any] = {}
234
- # Check that the file is UTF-8 encoded. If not report where the error occurs.
235
248
  try:
236
249
  with file.open(encoding="utf-8") as f:
237
250
  f.read()
@@ -240,6 +253,99 @@ def _check_jsonl(file: Path) -> Dict[str, Any]:
240
253
  report_dict["utf8"] = False
241
254
  report_dict["message"] = f"File is not UTF-8 encoded. Error raised: {e}."
242
255
  report_dict["is_check_passed"] = False
256
+ return report_dict
257
+
258
+
259
+ def _check_samples_count(
260
+ file: Path, report_dict: Dict[str, Any], idx: int
261
+ ) -> Dict[str, Any]:
262
+ if idx + 1 < MIN_SAMPLES:
263
+ report_dict["has_min_samples"] = False
264
+ report_dict["message"] = (
265
+ f"Processing {file} resulted in only {idx + 1} samples. "
266
+ f"Our minimum is {MIN_SAMPLES} samples. "
267
+ )
268
+ report_dict["is_check_passed"] = False
269
+ else:
270
+ report_dict["num_samples"] = idx + 1
271
+ report_dict["has_min_samples"] = True
272
+
273
+ return report_dict
274
+
275
+
276
+ def _check_csv(file: Path, purpose: FilePurpose | str) -> Dict[str, Any]:
277
+ """Check if the file is a valid CSV file.
278
+
279
+ Args:
280
+ file (Path): Path to the file to check.
281
+ purpose (FilePurpose | str): Purpose of the file, used to determine if the file should be checked for specific columns.
282
+
283
+ Returns:
284
+ Dict[str, Any]: A dictionary with the results of the check.
285
+ """
286
+ report_dict: Dict[str, Any] = {}
287
+ if purpose != FilePurpose.Eval:
288
+ report_dict["is_check_passed"] = False
289
+ report_dict["message"] = (
290
+ f"CSV files are not supported for {purpose}. "
291
+ "Only JSONL and Parquet files are supported."
292
+ )
293
+ return report_dict
294
+
295
+ report_dict.update(_check_utf8(file))
296
+
297
+ if not report_dict["utf8"]:
298
+ return report_dict
299
+
300
+ with file.open() as f:
301
+ reader = csv.DictReader(f)
302
+ if not reader.fieldnames:
303
+ report_dict["message"] = "CSV file is empty or has no header."
304
+ report_dict["is_check_passed"] = False
305
+ return report_dict
306
+ idx = -1
307
+
308
+ try:
309
+ # for loop to iterate through the CSV rows
310
+ for idx, item in enumerate(reader):
311
+ if None in item.keys() or None in item.values():
312
+ raise InvalidFileFormatError(
313
+ message=f"CSV file is malformed or the number of columns found on line {idx + 1} is inconsistent with the header",
314
+ line_number=idx + 1,
315
+ error_source="format",
316
+ )
317
+
318
+ report_dict.update(_check_samples_count(file, report_dict, idx))
319
+ report_dict["load_csv"] = True
320
+
321
+ except InvalidFileFormatError as e:
322
+ report_dict["load_csv"] = False
323
+ report_dict["is_check_passed"] = False
324
+ report_dict["message"] = e.message
325
+ if e.line_number is not None:
326
+ report_dict["line_number"] = e.line_number
327
+ if e.error_source is not None:
328
+ report_dict[e.error_source] = False
329
+ except ValueError:
330
+ report_dict["load_csv"] = False
331
+ if idx < 0:
332
+ report_dict["message"] = (
333
+ "Unable to decode file. "
334
+ "File may be empty or in an unsupported format. "
335
+ )
336
+ else:
337
+ report_dict["message"] = (
338
+ f"Error parsing the CSV file. Unexpected format on line {idx + 1}."
339
+ )
340
+ report_dict["is_check_passed"] = False
341
+
342
+ return report_dict
343
+
344
+
345
+ def _check_jsonl(file: Path, purpose: FilePurpose | str) -> Dict[str, Any]:
346
+ report_dict: Dict[str, Any] = {}
347
+ report_dict.update(_check_utf8(file))
348
+ if not report_dict["utf8"]:
243
349
  return report_dict
244
350
 
245
351
  dataset_format = None
@@ -259,84 +365,75 @@ def _check_jsonl(file: Path) -> Dict[str, Any]:
259
365
  line_number=idx + 1,
260
366
  error_source="line_type",
261
367
  )
262
-
263
- current_format = None
264
- for possible_format in JSONL_REQUIRED_COLUMNS_MAP:
265
- if all(
266
- column in json_line
267
- for column in JSONL_REQUIRED_COLUMNS_MAP[possible_format]
268
- ):
269
- if current_format is None:
270
- current_format = possible_format
271
- elif current_format != possible_format:
272
- raise InvalidFileFormatError(
273
- message="Found multiple dataset formats in the input file. "
274
- f"Got {current_format} and {possible_format} on line {idx + 1}.",
275
- line_number=idx + 1,
276
- error_source="format",
277
- )
278
-
279
- # Check that there are no extra columns
280
- for column in json_line:
281
- if (
282
- column
283
- not in JSONL_REQUIRED_COLUMNS_MAP[possible_format]
284
- ):
368
+ # In evals, we don't check the format of the dataset.
369
+ if purpose != FilePurpose.Eval:
370
+ current_format = None
371
+ for possible_format in JSONL_REQUIRED_COLUMNS_MAP:
372
+ if all(
373
+ column in json_line
374
+ for column in JSONL_REQUIRED_COLUMNS_MAP[possible_format]
375
+ ):
376
+ if current_format is None:
377
+ current_format = possible_format
378
+ elif current_format != possible_format:
285
379
  raise InvalidFileFormatError(
286
- message=f'Found extra column "{column}" in the line {idx + 1}.',
380
+ message="Found multiple dataset formats in the input file. "
381
+ f"Got {current_format} and {possible_format} on line {idx + 1}.",
287
382
  line_number=idx + 1,
288
383
  error_source="format",
289
384
  )
290
385
 
291
- if current_format is None:
292
- raise InvalidFileFormatError(
293
- message=(
294
- f"Error parsing file. Could not detect a format for the line {idx + 1} with the columns:\n"
295
- f"{json_line.keys()}"
296
- ),
297
- line_number=idx + 1,
298
- error_source="format",
299
- )
300
- if current_format == DatasetFormat.PREFERENCE_OPENAI:
301
- validate_preference_openai(json_line, idx)
302
- elif current_format == DatasetFormat.CONVERSATION:
303
- message_column = JSONL_REQUIRED_COLUMNS_MAP[
304
- DatasetFormat.CONVERSATION
305
- ][0]
306
- validate_messages(json_line[message_column], idx)
307
- else:
308
- for column in JSONL_REQUIRED_COLUMNS_MAP[current_format]:
309
- if not isinstance(json_line[column], str):
310
- raise InvalidFileFormatError(
311
- message=f'Invalid value type for "{column}" key on line {idx + 1}. '
312
- f"Expected string. Found {type(json_line[column])}.",
313
- line_number=idx + 1,
314
- error_source="key_value",
315
- )
316
-
317
- if dataset_format is None:
318
- dataset_format = current_format
319
- elif current_format is not None:
320
- if current_format != dataset_format:
386
+ # Check that there are no extra columns
387
+ for column in json_line:
388
+ if (
389
+ column
390
+ not in JSONL_REQUIRED_COLUMNS_MAP[possible_format]
391
+ ):
392
+ raise InvalidFileFormatError(
393
+ message=f'Found extra column "{column}" in the line {idx + 1}.',
394
+ line_number=idx + 1,
395
+ error_source="format",
396
+ )
397
+
398
+ if current_format is None:
321
399
  raise InvalidFileFormatError(
322
- message="All samples in the dataset must have the same dataset format. "
323
- f"Got {dataset_format} for the first line and {current_format} "
324
- f"for the line {idx + 1}.",
400
+ message=(
401
+ f"Error parsing file. Could not detect a format for the line {idx + 1} with the columns:\n"
402
+ f"{json_line.keys()}"
403
+ ),
325
404
  line_number=idx + 1,
326
405
  error_source="format",
327
406
  )
407
+ if current_format == DatasetFormat.PREFERENCE_OPENAI:
408
+ validate_preference_openai(json_line, idx)
409
+ elif current_format == DatasetFormat.CONVERSATION:
410
+ message_column = JSONL_REQUIRED_COLUMNS_MAP[
411
+ DatasetFormat.CONVERSATION
412
+ ][0]
413
+ validate_messages(json_line[message_column], idx)
414
+ else:
415
+ for column in JSONL_REQUIRED_COLUMNS_MAP[current_format]:
416
+ if not isinstance(json_line[column], str):
417
+ raise InvalidFileFormatError(
418
+ message=f'Invalid value type for "{column}" key on line {idx + 1}. '
419
+ f"Expected string. Found {type(json_line[column])}.",
420
+ line_number=idx + 1,
421
+ error_source="key_value",
422
+ )
328
423
 
329
- if idx + 1 < MIN_SAMPLES:
330
- report_dict["has_min_samples"] = False
331
- report_dict["message"] = (
332
- f"Processing {file} resulted in only {idx + 1} samples. "
333
- f"Our minimum is {MIN_SAMPLES} samples. "
334
- )
335
- report_dict["is_check_passed"] = False
336
- else:
337
- report_dict["num_samples"] = idx + 1
338
- report_dict["has_min_samples"] = True
339
- report_dict["is_check_passed"] = True
424
+ if dataset_format is None:
425
+ dataset_format = current_format
426
+ elif current_format is not None:
427
+ if current_format != dataset_format:
428
+ raise InvalidFileFormatError(
429
+ message="All samples in the dataset must have the same dataset format. "
430
+ f"Got {dataset_format} for the first line and {current_format} "
431
+ f"for the line {idx + 1}.",
432
+ line_number=idx + 1,
433
+ error_source="format",
434
+ )
435
+
436
+ report_dict.update(_check_samples_count(file, report_dict, idx))
340
437
 
341
438
  report_dict["load_json"] = True
342
439
 
@@ -370,7 +467,7 @@ def _check_jsonl(file: Path) -> Dict[str, Any]:
370
467
  return report_dict
371
468
 
372
469
 
373
- def _check_parquet(file: Path) -> Dict[str, Any]:
470
+ def _check_parquet(file: Path, purpose: FilePurpose | str) -> Dict[str, Any]:
374
471
  try:
375
472
  # Pyarrow is optional as it's large (~80MB) and isn't compatible with older systems.
376
473
  from pyarrow import ArrowInvalid, parquet
@@ -380,6 +477,13 @@ def _check_parquet(file: Path) -> Dict[str, Any]:
380
477
  )
381
478
 
382
479
  report_dict: Dict[str, Any] = {}
480
+ if purpose == FilePurpose.Eval:
481
+ report_dict["is_check_passed"] = False
482
+ report_dict["message"] = (
483
+ f"Parquet files are not supported for {purpose}. "
484
+ "Only JSONL and CSV files are supported."
485
+ )
486
+ return report_dict
383
487
 
384
488
  try:
385
489
  table = parquet.read_table(str(file), memory_map=True)
@@ -399,6 +503,7 @@ def _check_parquet(file: Path) -> Dict[str, Any]:
399
503
  report_dict["is_check_passed"] = False
400
504
  return report_dict
401
505
 
506
+ # Don't check for eval
402
507
  for column_name in column_names:
403
508
  if column_name not in PARQUET_EXPECTED_COLUMNS:
404
509
  report_dict["load_parquet"] = (
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: together
3
- Version: 1.5.21
3
+ Version: 1.5.23
4
4
  Summary: Python client for Together's Cloud Platform!
5
5
  License: Apache-2.0
6
6
  Author: Together AI
@@ -421,6 +421,33 @@ for model in models:
421
421
  print(model)
422
422
  ```
423
423
 
424
+ ### Batch Inference
425
+
426
+ The batch API allows you to submit larger inference jobs for completion with a 24 hour turn-around time, below is an example. To learn more refer to the [docs here](https://docs.together.ai/docs/batch-inference).
427
+
428
+ ```python
429
+ from together import Together
430
+
431
+ client = Together()
432
+
433
+ # Upload the batch file
434
+ batch_file = client.files.upload(file="simpleqa_batch_student.jsonl", purpose="batch-api")
435
+
436
+ # Create the batch job
437
+ batch = client.batches.create_batch(file_id=batch_file.id, endpoint="/v1/chat/completions")
438
+
439
+ # Monitor the batch status
440
+ batch_stat = client.batches.get_batch(batch.id)
441
+
442
+ # List all batches - contains other batches as well
443
+ client.batches.list_batches()
444
+
445
+ # Download the file content if job completed
446
+ if batch_stat.status == 'COMPLETED':
447
+ output_response = client.files.retrieve_content(id=batch_stat.output_file_id,
448
+ output="simpleqa_v3_output.jsonl")
449
+ ```
450
+
424
451
  ## Usage – CLI
425
452
 
426
453
  ### Chat Completions