PyPI - together - Versions diffs - 1.5.21__py3-none-any.whl → 1.5.23__py3-none-any.whl - Mend

together 1.5.21py3-none-any.whl → 1.5.23py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (20) hide show

together/cli/api/evaluation.py +379 -0
together/cli/api/finetune.py +0 -14
together/cli/cli.py +2 -0
together/client.py +4 -0
together/filemanager.py +2 -4
together/legacy/finetune.py +2 -2
together/resources/__init__.py +3 -0
together/resources/batch.py +0 -1
together/resources/evaluation.py +724 -0
together/resources/finetune.py +13 -26
together/types/__init__.py +24 -0
together/types/evaluation.py +87 -0
together/types/files.py +2 -0
together/types/finetune.py +1 -1
together/utils/files.py +178 -73
{together-1.5.21.dist-info → together-1.5.23.dist-info}/METADATA +28 -1
{together-1.5.21.dist-info → together-1.5.23.dist-info}/RECORD +20 -17
{together-1.5.21.dist-info → together-1.5.23.dist-info}/LICENSE +0 -0
{together-1.5.21.dist-info → together-1.5.23.dist-info}/WHEEL +0 -0
{together-1.5.21.dist-info → together-1.5.23.dist-info}/entry_points.txt +0 -0

together/resources/finetune.py CHANGED Viewed

@@ -89,18 +89,10 @@ def create_finetune_request(
     model_or_checkpoint = model or from_checkpoint
-    if batch_size == "max":
-        log_warn_once(
-            "Starting from together>=1.3.0, "
-            "the default batch size is set to the maximum allowed value for each model."
-        )
     if warmup_ratio is None:
         warmup_ratio = 0.0
     training_type: TrainingType = FullTrainingType()
-    max_batch_size: int = 0
-    max_batch_size_dpo: int = 0
-    min_batch_size: int = 0
     if lora:
         if model_limits.lora_training is None:
             raise ValueError(
@@ -133,28 +125,23 @@ def create_finetune_request(
         min_batch_size = model_limits.full_training.min_batch_size
         max_batch_size_dpo = model_limits.full_training.max_batch_size_dpo
-    if batch_size == "max":
-        if training_method == "dpo":
-            batch_size = max_batch_size_dpo
-        else:
-            batch_size = max_batch_size
+    if batch_size != "max":
+        if training_method == "sft":
+            if batch_size > max_batch_size:
+                raise ValueError(
+                    f"Requested batch size of {batch_size} is higher that the maximum allowed value of {max_batch_size}."
+                )
+        elif training_method == "dpo":
+            if batch_size > max_batch_size_dpo:
+                raise ValueError(
+                    f"Requested batch size of {batch_size} is higher that the maximum allowed value of {max_batch_size_dpo}."
+                )
-    if training_method == "sft":
-        if batch_size > max_batch_size:
-            raise ValueError(
-                f"Requested batch size of {batch_size} is higher that the maximum allowed value of {max_batch_size}."
-            )
-    elif training_method == "dpo":
-        if batch_size > max_batch_size_dpo:
+        if batch_size < min_batch_size:
             raise ValueError(
-                f"Requested batch size of {batch_size} is higher that the maximum allowed value of {max_batch_size_dpo}."
+                f"Requested batch size of {batch_size} is lower that the minimum allowed value of {min_batch_size}."
             )
-    if batch_size < min_batch_size:
-        raise ValueError(
-            f"Requested batch size of {batch_size} is lower that the minimum allowed value of {min_batch_size}."
-        )
     if warmup_ratio > 1 or warmup_ratio < 0:
         raise ValueError(f"Warmup ratio should be between 0 and 1 (got {warmup_ratio})")

together/types/__init__.py CHANGED Viewed

@@ -61,6 +61,19 @@ from together.types.images import ImageRequest, ImageResponse
 from together.types.models import ModelObject
 from together.types.rerank import RerankRequest, RerankResponse
 from together.types.batch import BatchJob, BatchJobStatus, BatchEndpoint
+from together.types.evaluation import (
+    EvaluationType,
+    EvaluationStatus,
+    JudgeModelConfig,
+    ModelRequest,
+    ClassifyParameters,
+    ScoreParameters,
+    CompareParameters,
+    EvaluationRequest,
+    EvaluationCreateResponse,
+    EvaluationJob,
+    EvaluationStatusResponse,
+)
 __all__ = [
@@ -124,4 +137,15 @@ __all__ = [
     "BatchJob",
     "BatchJobStatus",
     "BatchEndpoint",
+    "EvaluationType",
+    "EvaluationStatus",
+    "JudgeModelConfig",
+    "ModelRequest",
+    "ClassifyParameters",
+    "ScoreParameters",
+    "CompareParameters",
+    "EvaluationRequest",
+    "EvaluationCreateResponse",
+    "EvaluationJob",
+    "EvaluationStatusResponse",
 ]

together/types/evaluation.py ADDED Viewed

@@ -0,0 +1,87 @@
+from __future__ import annotations
+from datetime import datetime
+from enum import Enum
+from typing import Any, Dict, List, Optional, Union
+from pydantic import BaseModel, Field
+class EvaluationType(str, Enum):
+    CLASSIFY = "classify"
+    SCORE = "score"
+    COMPARE = "compare"
+class EvaluationStatus(str, Enum):
+    PENDING = "pending"
+    QUEUED = "queued"
+    RUNNING = "running"
+    COMPLETED = "completed"
+    ERROR = "error"
+    USER_ERROR = "user_error"
+class JudgeModelConfig(BaseModel):
+    model_name: str
+    system_template: str
+class ModelRequest(BaseModel):
+    model_name: str
+    max_tokens: int
+    temperature: float
+    system_template: str
+    input_template: str
+class ClassifyParameters(BaseModel):
+    judge: JudgeModelConfig
+    labels: List[str]
+    pass_labels: List[str]
+    model_to_evaluate: Optional[Union[str, ModelRequest]] = None
+    input_data_file_path: str
+class ScoreParameters(BaseModel):
+    judge: JudgeModelConfig
+    min_score: float
+    max_score: float
+    pass_threshold: float
+    model_to_evaluate: Optional[Union[str, ModelRequest]] = None
+    input_data_file_path: str
+class CompareParameters(BaseModel):
+    judge: JudgeModelConfig
+    model_a: Optional[Union[str, ModelRequest]] = None
+    model_b: Optional[Union[str, ModelRequest]] = None
+    input_data_file_path: str
+class EvaluationRequest(BaseModel):
+    type: EvaluationType
+    parameters: Union[ClassifyParameters, ScoreParameters, CompareParameters]
+class EvaluationCreateResponse(BaseModel):
+    workflow_id: str
+    status: EvaluationStatus
+class EvaluationJob(BaseModel):
+    workflow_id: str = Field(alias="workflow_id")
+    type: Optional[EvaluationType] = None
+    status: EvaluationStatus
+    results: Optional[Dict[str, Any]] = None
+    parameters: Optional[Dict[str, Any]] = None
+    created_at: Optional[datetime] = None
+    updated_at: Optional[datetime] = None
+    class Config:
+        populate_by_name = True
+class EvaluationStatusResponse(BaseModel):
+    status: EvaluationStatus
+    results: Optional[Dict[str, Any]] = None

together/types/files.py CHANGED Viewed

@@ -14,11 +14,13 @@ from together.types.common import (
 class FilePurpose(str, Enum):
     FineTune = "fine-tune"
     BatchAPI = "batch-api"
+    Eval = "eval"
 class FileType(str, Enum):
     jsonl = "jsonl"
     parquet = "parquet"
+    csv = "csv"
 class FileRequest(BaseModel):

together/types/finetune.py CHANGED Viewed

@@ -195,7 +195,7 @@ class FinetuneRequest(BaseModel):
     # number of evaluation loops to run
     n_evals: int | None = None
     # training batch size
-    batch_size: int | None = None
+    batch_size: int | Literal["max"] | None = None
     # up to 40 character suffix for output model name
     suffix: str | None = None
     # weights & biases api key

together/utils/files.py CHANGED Viewed

@@ -2,6 +2,7 @@ from __future__ import annotations
 import json
 import os
+import csv
 from pathlib import Path
 from traceback import format_exc
 from typing import Any, Dict, List
@@ -17,6 +18,7 @@ from together.constants import (
     POSSIBLE_ROLES_CONVERSATION,
     DatasetFormat,
 )
+from together.types import FilePurpose
 class InvalidFileFormatError(ValueError):
@@ -36,6 +38,7 @@ class InvalidFileFormatError(ValueError):
 def check_file(
     file: Path | str,
+    purpose: FilePurpose | str = FilePurpose.FineTune,
 ) -> Dict[str, Any]:
     if not isinstance(file, Path):
         file = Path(file)
@@ -52,6 +55,7 @@ def check_file(
         "has_min_samples": None,
         "num_samples": None,
         "load_json": None,
+        "load_csv": None,
     }
     if not file.is_file():
@@ -79,10 +83,13 @@ def check_file(
     data_report_dict = {}
     if file.suffix == ".jsonl":
         report_dict["filetype"] = "jsonl"
-        data_report_dict = _check_jsonl(file)
+        data_report_dict = _check_jsonl(file, purpose)
     elif file.suffix == ".parquet":
         report_dict["filetype"] = "parquet"
-        data_report_dict = _check_parquet(file)
+        data_report_dict = _check_parquet(file, purpose)
+    elif file.suffix == ".csv":
+        report_dict["filetype"] = "csv"
+        data_report_dict = _check_csv(file, purpose)
     else:
         report_dict["filetype"] = (
             f"Unknown extension of file {file}. "
@@ -229,9 +236,15 @@ def validate_preference_openai(example: Dict[str, Any], idx: int = 0) -> None:
     validate_messages(example["non_preferred_output"], idx)
-def _check_jsonl(file: Path) -> Dict[str, Any]:
+def _check_utf8(file: Path) -> Dict[str, Any]:
+    """Check if the file is UTF-8 encoded.
+    Args:
+        file (Path): Path to the file to check.
+    Returns:
+        Dict[str, Any]: A dictionary with the results of the check.
+    """
     report_dict: Dict[str, Any] = {}
-    # Check that the file is UTF-8 encoded. If not report where the error occurs.
     try:
         with file.open(encoding="utf-8") as f:
             f.read()
@@ -240,6 +253,99 @@ def _check_jsonl(file: Path) -> Dict[str, Any]:
         report_dict["utf8"] = False
         report_dict["message"] = f"File is not UTF-8 encoded. Error raised: {e}."
         report_dict["is_check_passed"] = False
+    return report_dict
+def _check_samples_count(
+    file: Path, report_dict: Dict[str, Any], idx: int
+) -> Dict[str, Any]:
+    if idx + 1 < MIN_SAMPLES:
+        report_dict["has_min_samples"] = False
+        report_dict["message"] = (
+            f"Processing {file} resulted in only {idx + 1} samples. "
+            f"Our minimum is {MIN_SAMPLES} samples. "
+        )
+        report_dict["is_check_passed"] = False
+    else:
+        report_dict["num_samples"] = idx + 1
+        report_dict["has_min_samples"] = True
+    return report_dict
+def _check_csv(file: Path, purpose: FilePurpose | str) -> Dict[str, Any]:
+    """Check if the file is a valid CSV file.
+    Args:
+        file (Path): Path to the file to check.
+        purpose (FilePurpose | str): Purpose of the file, used to determine if the file should be checked for specific columns.
+    Returns:
+        Dict[str, Any]: A dictionary with the results of the check.
+    """
+    report_dict: Dict[str, Any] = {}
+    if purpose != FilePurpose.Eval:
+        report_dict["is_check_passed"] = False
+        report_dict["message"] = (
+            f"CSV files are not supported for {purpose}. "
+            "Only JSONL and Parquet files are supported."
+        )
+        return report_dict
+    report_dict.update(_check_utf8(file))
+    if not report_dict["utf8"]:
+        return report_dict
+    with file.open() as f:
+        reader = csv.DictReader(f)
+        if not reader.fieldnames:
+            report_dict["message"] = "CSV file is empty or has no header."
+            report_dict["is_check_passed"] = False
+            return report_dict
+        idx = -1
+        try:
+            # for loop to iterate through the CSV rows
+            for idx, item in enumerate(reader):
+                if None in item.keys() or None in item.values():
+                    raise InvalidFileFormatError(
+                        message=f"CSV file is malformed or the number of columns found on line {idx + 1} is inconsistent with the header",
+                        line_number=idx + 1,
+                        error_source="format",
+                    )
+            report_dict.update(_check_samples_count(file, report_dict, idx))
+            report_dict["load_csv"] = True
+        except InvalidFileFormatError as e:
+            report_dict["load_csv"] = False
+            report_dict["is_check_passed"] = False
+            report_dict["message"] = e.message
+            if e.line_number is not None:
+                report_dict["line_number"] = e.line_number
+            if e.error_source is not None:
+                report_dict[e.error_source] = False
+        except ValueError:
+            report_dict["load_csv"] = False
+            if idx < 0:
+                report_dict["message"] = (
+                    "Unable to decode file. "
+                    "File may be empty or in an unsupported format. "
+                )
+            else:
+                report_dict["message"] = (
+                    f"Error parsing the CSV file. Unexpected format on line {idx + 1}."
+                )
+            report_dict["is_check_passed"] = False
+    return report_dict
+def _check_jsonl(file: Path, purpose: FilePurpose | str) -> Dict[str, Any]:
+    report_dict: Dict[str, Any] = {}
+    report_dict.update(_check_utf8(file))
+    if not report_dict["utf8"]:
         return report_dict
     dataset_format = None
@@ -259,84 +365,75 @@ def _check_jsonl(file: Path) -> Dict[str, Any]:
                         line_number=idx + 1,
                         error_source="line_type",
                     )
-                current_format = None
-                for possible_format in JSONL_REQUIRED_COLUMNS_MAP:
-                    if all(
-                        column in json_line
-                        for column in JSONL_REQUIRED_COLUMNS_MAP[possible_format]
-                    ):
-                        if current_format is None:
-                            current_format = possible_format
-                        elif current_format != possible_format:
-                            raise InvalidFileFormatError(
-                                message="Found multiple dataset formats in the input file. "
-                                f"Got {current_format} and {possible_format} on line {idx + 1}.",
-                                line_number=idx + 1,
-                                error_source="format",
-                            )
-                        # Check that there are no extra columns
-                        for column in json_line:
-                            if (
-                                column
-                                not in JSONL_REQUIRED_COLUMNS_MAP[possible_format]
-                            ):
+                # In evals, we don't check the format of the dataset.
+                if purpose != FilePurpose.Eval:
+                    current_format = None
+                    for possible_format in JSONL_REQUIRED_COLUMNS_MAP:
+                        if all(
+                            column in json_line
+                            for column in JSONL_REQUIRED_COLUMNS_MAP[possible_format]
+                        ):
+                            if current_format is None:
+                                current_format = possible_format
+                            elif current_format != possible_format:
                                 raise InvalidFileFormatError(
-                                    message=f'Found extra column "{column}" in the line {idx + 1}.',
+                                    message="Found multiple dataset formats in the input file. "
+                                    f"Got {current_format} and {possible_format} on line {idx + 1}.",
                                     line_number=idx + 1,
                                     error_source="format",
                                 )
-                if current_format is None:
-                    raise InvalidFileFormatError(
-                        message=(
-                            f"Error parsing file. Could not detect a format for the line {idx + 1} with the columns:\n"
-                            f"{json_line.keys()}"
-                        ),
-                        line_number=idx + 1,
-                        error_source="format",
-                    )
-                if current_format == DatasetFormat.PREFERENCE_OPENAI:
-                    validate_preference_openai(json_line, idx)
-                elif current_format == DatasetFormat.CONVERSATION:
-                    message_column = JSONL_REQUIRED_COLUMNS_MAP[
-                        DatasetFormat.CONVERSATION
-                    ][0]
-                    validate_messages(json_line[message_column], idx)
-                else:
-                    for column in JSONL_REQUIRED_COLUMNS_MAP[current_format]:
-                        if not isinstance(json_line[column], str):
-                            raise InvalidFileFormatError(
-                                message=f'Invalid value type for "{column}" key on line {idx + 1}. '
-                                f"Expected string. Found {type(json_line[column])}.",
-                                line_number=idx + 1,
-                                error_source="key_value",
-                            )
-                if dataset_format is None:
-                    dataset_format = current_format
-                elif current_format is not None:
-                    if current_format != dataset_format:
+                            # Check that there are no extra columns
+                            for column in json_line:
+                                if (
+                                    column
+                                    not in JSONL_REQUIRED_COLUMNS_MAP[possible_format]
+                                ):
+                                    raise InvalidFileFormatError(
+                                        message=f'Found extra column "{column}" in the line {idx + 1}.',
+                                        line_number=idx + 1,
+                                        error_source="format",
+                                    )
+                    if current_format is None:
                         raise InvalidFileFormatError(
-                            message="All samples in the dataset must have the same dataset format. "
-                            f"Got {dataset_format} for the first line and {current_format} "
-                            f"for the line {idx + 1}.",
+                            message=(
+                                f"Error parsing file. Could not detect a format for the line {idx + 1} with the columns:\n"
+                                f"{json_line.keys()}"
+                            ),
                             line_number=idx + 1,
                             error_source="format",
                         )
+                    if current_format == DatasetFormat.PREFERENCE_OPENAI:
+                        validate_preference_openai(json_line, idx)
+                    elif current_format == DatasetFormat.CONVERSATION:
+                        message_column = JSONL_REQUIRED_COLUMNS_MAP[
+                            DatasetFormat.CONVERSATION
+                        ][0]
+                        validate_messages(json_line[message_column], idx)
+                    else:
+                        for column in JSONL_REQUIRED_COLUMNS_MAP[current_format]:
+                            if not isinstance(json_line[column], str):
+                                raise InvalidFileFormatError(
+                                    message=f'Invalid value type for "{column}" key on line {idx + 1}. '
+                                    f"Expected string. Found {type(json_line[column])}.",
+                                    line_number=idx + 1,
+                                    error_source="key_value",
+                                )
-            if idx + 1 < MIN_SAMPLES:
-                report_dict["has_min_samples"] = False
-                report_dict["message"] = (
-                    f"Processing {file} resulted in only {idx + 1} samples. "
-                    f"Our minimum is {MIN_SAMPLES} samples. "
-                )
-                report_dict["is_check_passed"] = False
-            else:
-                report_dict["num_samples"] = idx + 1
-                report_dict["has_min_samples"] = True
-                report_dict["is_check_passed"] = True
+                    if dataset_format is None:
+                        dataset_format = current_format
+                    elif current_format is not None:
+                        if current_format != dataset_format:
+                            raise InvalidFileFormatError(
+                                message="All samples in the dataset must have the same dataset format. "
+                                f"Got {dataset_format} for the first line and {current_format} "
+                                f"for the line {idx + 1}.",
+                                line_number=idx + 1,
+                                error_source="format",
+                            )
+            report_dict.update(_check_samples_count(file, report_dict, idx))
             report_dict["load_json"] = True
@@ -370,7 +467,7 @@ def _check_jsonl(file: Path) -> Dict[str, Any]:
     return report_dict
-def _check_parquet(file: Path) -> Dict[str, Any]:
+def _check_parquet(file: Path, purpose: FilePurpose | str) -> Dict[str, Any]:
     try:
         # Pyarrow is optional as it's large (~80MB) and isn't compatible with older systems.
         from pyarrow import ArrowInvalid, parquet
@@ -380,6 +477,13 @@ def _check_parquet(file: Path) -> Dict[str, Any]:
         )
     report_dict: Dict[str, Any] = {}
+    if purpose == FilePurpose.Eval:
+        report_dict["is_check_passed"] = False
+        report_dict["message"] = (
+            f"Parquet files are not supported for {purpose}. "
+            "Only JSONL and CSV files are supported."
+        )
+        return report_dict
     try:
         table = parquet.read_table(str(file), memory_map=True)
@@ -399,6 +503,7 @@ def _check_parquet(file: Path) -> Dict[str, Any]:
         report_dict["is_check_passed"] = False
         return report_dict
+    # Don't check for eval
     for column_name in column_names:
         if column_name not in PARQUET_EXPECTED_COLUMNS:
             report_dict["load_parquet"] = (

{together-1.5.21.dist-info → together-1.5.23.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.3
 Name: together
-Version: 1.5.21
+Version: 1.5.23
 Summary: Python client for Together's Cloud Platform!
 License: Apache-2.0
 Author: Together AI
@@ -421,6 +421,33 @@ for model in models:
     print(model)
 ```
+### Batch Inference
+The batch API allows you to submit larger inference jobs for completion with a 24 hour turn-around time, below is an example. To learn more refer to the [docs here](https://docs.together.ai/docs/batch-inference).
+```python
+from together import Together
+client = Together()
+# Upload the batch file
+batch_file = client.files.upload(file="simpleqa_batch_student.jsonl", purpose="batch-api")
+# Create the batch job
+batch = client.batches.create_batch(file_id=batch_file.id, endpoint="/v1/chat/completions")
+# Monitor the batch status
+batch_stat = client.batches.get_batch(batch.id)
+# List all batches - contains other batches as well
+client.batches.list_batches()
+# Download the file content if job completed
+if batch_stat.status == 'COMPLETED':
+    output_response = client.files.retrieve_content(id=batch_stat.output_file_id,
+                                                    output="simpleqa_v3_output.jsonl")
+```
 ## Usage – CLI
 ### Chat Completions

together 1.5.21__py3-none-any.whl → 1.5.23__py3-none-any.whl

together 1.5.21py3-none-any.whl → 1.5.23py3-none-any.whl