PyPI - together - Versions diffs - 1.5.32__py3-none-any.whl → 1.5.34__py3-none-any.whl - Mend

together 1.5.32py3-none-any.whl → 1.5.34py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

together/cli/api/finetune.py +27 -11
together/cli/api/utils.py +88 -0
together/constants.py +6 -0
together/resources/finetune.py +26 -4
together/types/__init__.py +29 -31
together/types/finetune.py +36 -6
together/utils/files.py +202 -35
{together-1.5.32.dist-info → together-1.5.34.dist-info}/METADATA +1 -1
{together-1.5.32.dist-info → together-1.5.34.dist-info}/RECORD +12 -12
{together-1.5.32.dist-info → together-1.5.34.dist-info}/WHEEL +0 -0
{together-1.5.32.dist-info → together-1.5.34.dist-info}/entry_points.txt +0 -0
{together-1.5.32.dist-info → together-1.5.34.dist-info}/licenses/LICENSE +0 -0

together/cli/api/finetune.py CHANGED Viewed

@@ -1,7 +1,6 @@
 from __future__ import annotations
 import json
-import re
 from datetime import datetime, timezone
 from textwrap import wrap
 from typing import Any, Literal
@@ -9,22 +8,16 @@ from typing import Any, Literal
 import click
 from click.core import ParameterSource  # type: ignore[attr-defined]
 from rich import print as rprint
+from rich.json import JSON
 from tabulate import tabulate
 from together import Together
-from together.cli.api.utils import BOOL_WITH_AUTO, INT_WITH_MAX
-from together.types.finetune import (
-    DownloadCheckpointType,
-    FinetuneEventType,
-    FinetuneTrainingLimits,
-    FullTrainingType,
-    LoRATrainingType,
-)
+from together.cli.api.utils import BOOL_WITH_AUTO, INT_WITH_MAX, generate_progress_bar
+from together.types.finetune import DownloadCheckpointType, FinetuneTrainingLimits
 from together.utils import (
     finetune_price_to_dollars,
     format_timestamp,
     log_warn,
-    log_warn_once,
     parse_timestamp,
 )
@@ -202,6 +195,12 @@ def fine_tuning(ctx: click.Context) -> None:
     help="Whether to mask the user messages in conversational data or prompts in instruction data. "
     "`auto` will automatically determine whether to mask the inputs based on the data format.",
 )
+@click.option(
+    "--train-vision",
+    type=bool,
+    default=False,
+    help="Whether to train the vision encoder. Only supported for multimodal models.",
+)
 @click.option(
     "--from-checkpoint",
     type=str,
@@ -257,6 +256,7 @@ def create(
     lora_dropout: float,
     lora_alpha: float,
     lora_trainable_modules: str,
+    train_vision: bool,
     suffix: str,
     wandb_api_key: str,
     wandb_base_url: str,
@@ -298,6 +298,7 @@ def create(
         lora_dropout=lora_dropout,
         lora_alpha=lora_alpha,
         lora_trainable_modules=lora_trainable_modules,
+        train_vision=train_vision,
         suffix=suffix,
         wandb_api_key=wandb_api_key,
         wandb_base_url=wandb_base_url,
@@ -367,6 +368,10 @@ def create(
             "You have specified a number of evaluation loops but no validation file."
         )
+    if model_limits.supports_vision:
+        # Don't show price estimation for multimodal models yet
+        confirm = True
     finetune_price_estimation_result = client.fine_tuning.estimate_price(
         training_file=training_file,
         validation_file=validation_file,
@@ -435,6 +440,9 @@ def list(ctx: click.Context) -> None:
                 "Price": f"""${
                     finetune_price_to_dollars(float(str(i.total_price)))
                 }""",  # convert to string for mypy typing
+                "Progress": generate_progress_bar(
+                    i, datetime.now().astimezone(), use_rich=False
+                ),
             }
         )
     table = tabulate(display_list, headers="keys", tablefmt="grid", showindex=True)
@@ -454,7 +462,15 @@ def retrieve(ctx: click.Context, fine_tune_id: str) -> None:
     # remove events from response for cleaner output
     response.events = None
-    click.echo(json.dumps(response.model_dump(exclude_none=True), indent=4))
+    rprint(JSON.from_data(response.model_dump(exclude_none=True)))
+    progress_text = generate_progress_bar(
+        response, datetime.now().astimezone(), use_rich=True
+    )
+    status = "Unknown"
+    if response.status is not None:
+        status = response.status.value
+    prefix = f"Status: [bold]{status}[/bold],"
+    rprint(f"{prefix} {progress_text}")
 @fine_tuning.command()

together/cli/api/utils.py CHANGED Viewed

@@ -1,10 +1,17 @@
 from __future__ import annotations
+import math
+import re
 from gettext import gettext as _
 from typing import Literal
+from datetime import datetime
 import click
+from together.types.finetune import FinetuneResponse, COMPLETED_STATUSES
+_PROGRESS_BAR_WIDTH = 40
 class AutoIntParamType(click.ParamType):
     name = "integer_or_max"
@@ -49,3 +56,84 @@ class BooleanWithAutoParamType(click.ParamType):
 INT_WITH_MAX = AutoIntParamType()
 BOOL_WITH_AUTO = BooleanWithAutoParamType()
+def _human_readable_time(timedelta: float) -> str:
+    """Convert a timedelta to a compact human-readble string
+    Examples:
+        00:00:10 -> 10s
+        01:23:45 -> 1h 23min 45s
+        1 Month 23 days 04:56:07 -> 1month 23d 4h 56min 7s
+    Args:
+        timedelta (float): The timedelta in seconds to convert.
+    Returns:
+        A string representing the timedelta in a human-readable format.
+    """
+    units = [
+        (30 * 24 * 60 * 60, "month"),  # 30 days
+        (24 * 60 * 60, "d"),
+        (60 * 60, "h"),
+        (60, "min"),
+        (1, "s"),
+    ]
+    total_seconds = int(timedelta)
+    parts = []
+    for unit_seconds, unit_name in units:
+        if total_seconds >= unit_seconds:
+            value = total_seconds // unit_seconds
+            total_seconds %= unit_seconds
+            parts.append(f"{value}{unit_name}")
+    return " ".join(parts) if parts else "0s"
+def generate_progress_bar(
+    finetune_job: FinetuneResponse, current_time: datetime, use_rich: bool = False
+) -> str:
+    """Generate a progress bar for a finetune job.
+    Args:
+        finetune_job: The finetune job to generate a progress bar for.
+        current_time: The current time.
+        use_rich: Whether to use rich formatting.
+    Returns:
+        A string representing the progress bar.
+    """
+    progress = "Progress: [bold red]unavailable[/bold red]"
+    if finetune_job.status in COMPLETED_STATUSES:
+        progress = "Progress: [bold green]completed[/bold green]"
+    elif finetune_job.updated_at is not None:
+        # Replace 'Z' with '+00:00' for Python 3.10 compatibility
+        updated_at_str = finetune_job.updated_at.replace("Z", "+00:00")
+        update_at = datetime.fromisoformat(updated_at_str).astimezone()
+        if finetune_job.progress is not None:
+            if current_time < update_at:
+                return progress
+            if not finetune_job.progress.estimate_available:
+                return progress
+            if finetune_job.progress.seconds_remaining <= 0:
+                return progress
+            elapsed_time = (current_time - update_at).total_seconds()
+            ratio_filled = min(
+                elapsed_time / finetune_job.progress.seconds_remaining, 1.0
+            )
+            percentage = ratio_filled * 100
+            filled = math.ceil(ratio_filled * _PROGRESS_BAR_WIDTH)
+            bar = "█" * filled + "░" * (_PROGRESS_BAR_WIDTH - filled)
+            time_left = "N/A"
+            if finetune_job.progress.seconds_remaining > elapsed_time:
+                time_left = _human_readable_time(
+                    finetune_job.progress.seconds_remaining - elapsed_time
+                )
+            time_text = f"{time_left} left"
+            progress = f"Progress: {bar} [bold]{percentage:>3.0f}%[/bold] [yellow]{time_text}[/yellow]"
+    if use_rich:
+        return progress
+    return re.sub(r"\[/?[^\]]+\]", "", progress)

together/constants.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import enum
 # Session constants
 TIMEOUT_SECS = 600
 MAX_SESSION_LIFETIME_SECS = 180
@@ -40,6 +41,11 @@ MIN_SAMPLES = 1
 # the number of bytes in a gigabyte, used to convert bytes to GB for readable comparison
 NUM_BYTES_IN_GB = 2**30
+# Multimodal limits
+MAX_IMAGES_PER_EXAMPLE = 10
+MAX_IMAGE_BYTES = 10 * 1024 * 1024  # 10MB
+# Max length = Header length + base64 factor (4/3) * image bytes
+MAX_BASE64_IMAGE_LENGTH = len("data:image/jpeg;base64,") + 4 * MAX_IMAGE_BYTES // 3
 # expected columns for Parquet files
 PARQUET_EXPECTED_COLUMNS = ["input_ids", "attention_mask", "labels"]

together/resources/finetune.py CHANGED Viewed

@@ -2,7 +2,7 @@ from __future__ import annotations
 import re
 from pathlib import Path
-from typing import List, Dict, Literal
+from typing import Dict, List, Literal
 from rich import print as rprint
@@ -18,10 +18,11 @@ from together.types import (
     FinetuneList,
     FinetuneListEvents,
     FinetuneLRScheduler,
-    FinetuneRequest,
-    FinetuneResponse,
+    FinetuneMultimodalParams,
     FinetunePriceEstimationRequest,
     FinetunePriceEstimationResponse,
+    FinetuneRequest,
+    FinetuneResponse,
     FinetuneTrainingLimits,
     FullTrainingType,
     LinearLRScheduler,
@@ -73,6 +74,7 @@ def create_finetune_request(
     lora_dropout: float | None = 0,
     lora_alpha: float | None = None,
     lora_trainable_modules: str | None = "all-linear",
+    train_vision: bool = False,
     suffix: str | None = None,
     wandb_api_key: str | None = None,
     wandb_base_url: str | None = None,
@@ -252,6 +254,15 @@ def create_finetune_request(
             simpo_gamma=simpo_gamma,
         )
+    if model_limits.supports_vision:
+        multimodal_params = FinetuneMultimodalParams(train_vision=train_vision)
+    elif not model_limits.supports_vision and train_vision:
+        raise ValueError(
+            f"Vision encoder training is not supported for the non-multimodal model `{model}`"
+        )
+    else:
+        multimodal_params = None
     finetune_request = FinetuneRequest(
         model=model,
         training_file=training_file,
@@ -272,6 +283,7 @@ def create_finetune_request(
         wandb_project_name=wandb_project_name,
         wandb_name=wandb_name,
         training_method=training_method_cls,
+        multimodal_params=multimodal_params,
         from_checkpoint=from_checkpoint,
         from_hf_model=from_hf_model,
         hf_model_revision=hf_model_revision,
@@ -342,6 +354,7 @@ class FineTuning:
         lora_dropout: float | None = 0,
         lora_alpha: float | None = None,
         lora_trainable_modules: str | None = "all-linear",
+        train_vision: bool = False,
         suffix: str | None = None,
         wandb_api_key: str | None = None,
         wandb_base_url: str | None = None,
@@ -387,6 +400,7 @@ class FineTuning:
             lora_dropout (float, optional): Dropout rate for LoRA adapters. Defaults to 0.
             lora_alpha (float, optional): Alpha for LoRA adapters. Defaults to 8.
             lora_trainable_modules (str, optional): Trainable modules for LoRA adapters. Defaults to "all-linear".
+            train_vision (bool, optional): Whether to train vision encoder in multimodal models. Defaults to False.
             suffix (str, optional): Up to 40 character suffix that will be added to your fine-tuned model name.
                 Defaults to None.
             wandb_api_key (str, optional): API key for Weights & Biases integration.
@@ -464,6 +478,7 @@ class FineTuning:
             lora_dropout=lora_dropout,
             lora_alpha=lora_alpha,
             lora_trainable_modules=lora_trainable_modules,
+            train_vision=train_vision,
             suffix=suffix,
             wandb_api_key=wandb_api_key,
             wandb_base_url=wandb_base_url,
@@ -906,6 +921,7 @@ class AsyncFineTuning:
         lora_dropout: float | None = 0,
         lora_alpha: float | None = None,
         lora_trainable_modules: str | None = "all-linear",
+        train_vision: bool = False,
         suffix: str | None = None,
         wandb_api_key: str | None = None,
         wandb_base_url: str | None = None,
@@ -951,6 +967,7 @@ class AsyncFineTuning:
             lora_dropout (float, optional): Dropout rate for LoRA adapters. Defaults to 0.
             lora_alpha (float, optional): Alpha for LoRA adapters. Defaults to 8.
             lora_trainable_modules (str, optional): Trainable modules for LoRA adapters. Defaults to "all-linear".
+            train_vision (bool, optional): Whether to train vision encoder in multimodal models. Defaults to False.
             suffix (str, optional): Up to 40 character suffix that will be added to your fine-tuned model name.
                 Defaults to None.
             wandb_api_key (str, optional): API key for Weights & Biases integration.
@@ -1028,6 +1045,7 @@ class AsyncFineTuning:
             lora_dropout=lora_dropout,
             lora_alpha=lora_alpha,
             lora_trainable_modules=lora_trainable_modules,
+            train_vision=train_vision,
             suffix=suffix,
             wandb_api_key=wandb_api_key,
             wandb_base_url=wandb_base_url,
@@ -1046,7 +1064,11 @@ class AsyncFineTuning:
             hf_output_repo_name=hf_output_repo_name,
         )
-        if from_checkpoint is None and from_hf_model is None:
+        if (
+            from_checkpoint is None
+            and from_hf_model is None
+            and not model_limits.supports_vision
+        ):
             price_estimation_result = await self.estimate_price(
                 training_file=training_file,
                 validation_file=validation_file,

together/types/__init__.py CHANGED Viewed

@@ -7,17 +7,18 @@ from together.types.audio_speech import (
     AudioSpeechStreamChunk,
     AudioSpeechStreamEvent,
     AudioSpeechStreamResponse,
+    AudioTimestampGranularities,
     AudioTranscriptionRequest,
-    AudioTranslationRequest,
     AudioTranscriptionResponse,
+    AudioTranscriptionResponseFormat,
     AudioTranscriptionVerboseResponse,
+    AudioTranslationRequest,
     AudioTranslationResponse,
     AudioTranslationVerboseResponse,
-    AudioTranscriptionResponseFormat,
-    AudioTimestampGranularities,
     ModelVoices,
     VoiceListResponse,
 )
+from together.types.batch import BatchEndpoint, BatchJob, BatchJobStatus
 from together.types.chat_completions import (
     ChatCompletionChunk,
     ChatCompletionRequest,
@@ -31,6 +32,19 @@ from together.types.completions import (
 )
 from together.types.embeddings import EmbeddingRequest, EmbeddingResponse
 from together.types.endpoints import Autoscaling, DedicatedEndpoint, ListEndpoint
+from together.types.evaluation import (
+    ClassifyParameters,
+    CompareParameters,
+    EvaluationCreateResponse,
+    EvaluationJob,
+    EvaluationRequest,
+    EvaluationStatus,
+    EvaluationStatusResponse,
+    EvaluationType,
+    JudgeModelConfig,
+    ModelRequest,
+    ScoreParameters,
+)
 from together.types.files import (
     FileDeleteResponse,
     FileList,
@@ -41,49 +55,32 @@ from together.types.files import (
     FileType,
 )
 from together.types.finetune import (
-    TrainingMethodDPO,
-    TrainingMethodSFT,
-    FinetuneCheckpoint,
     CosineLRScheduler,
     CosineLRSchedulerArgs,
+    FinetuneCheckpoint,
+    FinetuneDeleteResponse,
     FinetuneDownloadResult,
-    LinearLRScheduler,
-    LinearLRSchedulerArgs,
-    FinetuneLRScheduler,
     FinetuneList,
     FinetuneListEvents,
-    FinetuneRequest,
-    FinetuneResponse,
+    FinetuneLRScheduler,
+    FinetuneMultimodalParams,
     FinetunePriceEstimationRequest,
     FinetunePriceEstimationResponse,
-    FinetuneDeleteResponse,
+    FinetuneRequest,
+    FinetuneResponse,
     FinetuneTrainingLimits,
     FullTrainingType,
+    LinearLRScheduler,
+    LinearLRSchedulerArgs,
     LoRATrainingType,
+    TrainingMethodDPO,
+    TrainingMethodSFT,
     TrainingType,
 )
 from together.types.images import ImageRequest, ImageResponse
 from together.types.models import ModelObject, ModelUploadRequest, ModelUploadResponse
 from together.types.rerank import RerankRequest, RerankResponse
-from together.types.batch import BatchJob, BatchJobStatus, BatchEndpoint
-from together.types.evaluation import (
-    EvaluationType,
-    EvaluationStatus,
-    JudgeModelConfig,
-    ModelRequest,
-    ClassifyParameters,
-    ScoreParameters,
-    CompareParameters,
-    EvaluationRequest,
-    EvaluationCreateResponse,
-    EvaluationJob,
-    EvaluationStatusResponse,
-)
-from together.types.videos import (
-    CreateVideoBody,
-    CreateVideoResponse,
-    VideoJob,
-)
+from together.types.videos import CreateVideoBody, CreateVideoResponse, VideoJob
 __all__ = [
@@ -131,6 +128,7 @@ __all__ = [
     "RerankRequest",
     "RerankResponse",
     "FinetuneTrainingLimits",
+    "FinetuneMultimodalParams",
     "AudioSpeechRequest",
     "AudioResponseFormat",
     "AudioLanguage",

together/types/finetune.py CHANGED Viewed

@@ -1,14 +1,12 @@
 from __future__ import annotations
 from enum import Enum
-from typing import List, Literal, Any
+from typing import Any, List, Literal
 from pydantic import Field, StrictBool, field_validator
 from together.types.abstract import BaseModel
-from together.types.common import (
-    ObjectType,
-)
+from together.types.common import ObjectType
 class FinetuneJobStatus(str, Enum):
@@ -28,6 +26,14 @@ class FinetuneJobStatus(str, Enum):
     STATUS_COMPLETED = "completed"
+COMPLETED_STATUSES = [
+    FinetuneJobStatus.STATUS_ERROR,
+    FinetuneJobStatus.STATUS_USER_ERROR,
+    FinetuneJobStatus.STATUS_COMPLETED,
+    FinetuneJobStatus.STATUS_CANCELLED,
+]
 class FinetuneEventLevels(str, Enum):
     """
     Fine-tune job event status levels
@@ -167,6 +173,23 @@ class TrainingMethodDPO(TrainingMethod):
     simpo_gamma: float | None = None
+class FinetuneMultimodalParams(BaseModel):
+    """
+    Multimodal parameters
+    """
+    train_vision: bool = False
+class FinetuneProgress(BaseModel):
+    """
+    Fine-tune job progress
+    """
+    estimate_available: bool = False
+    seconds_remaining: float = 0
 class FinetuneRequest(BaseModel):
     """
     Fine-tune request type
@@ -214,6 +237,8 @@ class FinetuneRequest(BaseModel):
     )
     # from step
     from_checkpoint: str | None = None
+    # multimodal parameters
+    multimodal_params: FinetuneMultimodalParams | None = None
     # hf related fields
     hf_api_token: str | None = None
     hf_output_repo_name: str | None = None
@@ -296,6 +321,10 @@ class FinetuneResponse(BaseModel):
     training_file_size: int | None = Field(None, alias="TrainingFileSize")
     train_on_inputs: StrictBool | Literal["auto"] | None = "auto"
     from_checkpoint: str | None = None
+    # multimodal parameters
+    multimodal_params: FinetuneMultimodalParams | None = None
+    progress: FinetuneProgress | None = None
     @field_validator("training_type")
     @classmethod
@@ -318,8 +347,8 @@ class FinetunePriceEstimationRequest(BaseModel):
     model: str
     n_epochs: int
     n_evals: int
-    training_type: TrainingType
-    training_method: TrainingMethod
+    training_type: LoRATrainingType | FullTrainingType
+    training_method: TrainingMethodSFT | TrainingMethodDPO
 class FinetunePriceEstimationResponse(BaseModel):
@@ -390,6 +419,7 @@ class FinetuneTrainingLimits(BaseModel):
     min_learning_rate: float
     full_training: FinetuneFullTrainingLimits | None = None
     lora_training: FinetuneLoraTrainingLimits | None = None
+    supports_vision: bool = False
 class LinearLRSchedulerArgs(BaseModel):

together/utils/files.py CHANGED Viewed

@@ -1,8 +1,8 @@
 from __future__ import annotations
+import csv
 import json
 import os
-import csv
 from pathlib import Path
 from traceback import format_exc
 from typing import Any, Dict, List
@@ -10,18 +10,30 @@ from typing import Any, Dict, List
 from tqdm import tqdm
 from together.constants import (
+    JSONL_REQUIRED_COLUMNS_MAP,
+    MAX_BASE64_IMAGE_LENGTH,
     MAX_FILE_SIZE_GB,
+    MAX_IMAGES_PER_EXAMPLE,
     MIN_SAMPLES,
     NUM_BYTES_IN_GB,
     PARQUET_EXPECTED_COLUMNS,
-    JSONL_REQUIRED_COLUMNS_MAP,
-    REQUIRED_COLUMNS_MESSAGE,
     POSSIBLE_ROLES_CONVERSATION,
+    REQUIRED_COLUMNS_MESSAGE,
     DatasetFormat,
 )
 from together.types import FilePurpose
+# MessageContent is a string or a list of dicts with 'type': 'text' or 'image_url', and 'text' or 'image_url.url'
+# Example: "Hello" or [
+#   {"type": "text", "text": "Hello"},
+#   {"type": "image_url", "image_url": {
+#     "url": "data:image/jpeg;base64,..."
+#   }}
+# ]
+MessageContent = str | list[dict[str, Any]]
 class InvalidFileFormatError(ValueError):
     """Exception raised for invalid file formats during file checks."""
@@ -70,7 +82,7 @@ def check_file(
     if file_size > MAX_FILE_SIZE_GB * NUM_BYTES_IN_GB:
         report_dict["message"] = (
-            f"Maximum supported file size is {MAX_FILE_SIZE_GB} GB. Found file with size of {round(file_size / NUM_BYTES_IN_GB ,3)} GB."
+            f"Maximum supported file size is {MAX_FILE_SIZE_GB} GB. Found file with size of {round(file_size / NUM_BYTES_IN_GB, 3)} GB."
         )
         report_dict["is_check_passed"] = False
     elif file_size == 0:
@@ -103,7 +115,9 @@ def check_file(
     return report_dict
-def _check_conversation_type(messages: List[Dict[str, str | bool]], idx: int) -> None:
+def _check_conversation_type(
+    messages: List[Dict[str, str | int | MessageContent]], idx: int
+) -> None:
     """Check that the conversation has correct type.
     Args:
@@ -145,12 +159,6 @@ def _check_conversation_type(messages: List[Dict[str, str | bool]], idx: int) ->
                     line_number=idx + 1,
                     error_source="key_value",
                 )
-            if not isinstance(message[column], str):
-                raise InvalidFileFormatError(
-                    message=f"Column `{column}` is not a string on line {idx + 1}. Found {type(message[column])}",
-                    line_number=idx + 1,
-                    error_source="text_field",
-                )
 def _check_conversation_roles(
@@ -175,7 +183,9 @@ def _check_conversation_roles(
         )
-def _check_message_weight(message: Dict[str, str | bool], idx: int) -> None:
+def _check_message_weight(
+    message: Dict[str, str | int | MessageContent], idx: int
+) -> int | None:
     """Check that the message has a weight with the correct type and value.
     Args:
@@ -199,11 +209,14 @@ def _check_message_weight(message: Dict[str, str | bool], idx: int) -> None:
                 line_number=idx + 1,
                 error_source="key_value",
             )
+        return weight
+    return None
 def _check_message_role(
-    message: Dict[str, str | bool], previous_role: str | None, idx: int
-) -> str | bool:
+    message: Dict[str, str | int | MessageContent], previous_role: str | None, idx: int
+) -> str:
     """Check that the message has correct roles.
     Args:
@@ -217,6 +230,14 @@ def _check_message_role(
     Raises:
         InvalidFileFormatError: If the message role is invalid.
     """
+    if not isinstance(message["role"], str):
+        raise InvalidFileFormatError(
+            message=f"Invalid role `{message['role']}` in conversation on line {idx + 1}. "
+            f"Role must be a string. Found {type(message['role'])}",
+            line_number=idx + 1,
+            error_source="key_value",
+        )
     if message["role"] not in POSSIBLE_ROLES_CONVERSATION:
         raise InvalidFileFormatError(
             message=f"Invalid role `{message['role']}` in conversation on line {idx + 1}. "
@@ -234,8 +255,134 @@ def _check_message_role(
     return message["role"]
+def _check_message_content(
+    message_content: str | int | MessageContent, role: str, idx: int
+) -> tuple[bool, int]:
+    """Check that the message content has the correct type.
+    Message content can be either a) a string or b) an OpenAI-style multimodal list of content items
+    Example:
+        a) "Hello", or
+        b) [
+             {"type": "text", "text": "Hello"},
+             {"type": "image_url", "image_url": {
+                "url": "data:image/jpeg;base64,..."
+             }}
+           ]
+    Args:
+        message: The message to check.
+        role: The role of the message.
+        idx: Line number in the file.
+    Returns:
+        tuple[bool, int]: A tuple with message is multimodal and the number of images in the message content.
+    """
+    # Text-only message content
+    if isinstance(message_content, str):
+        return False, 0
+    # Multimodal message content
+    if isinstance(message_content, list):
+        num_images = 0
+        for item in message_content:
+            if not isinstance(item, dict):
+                raise InvalidFileFormatError(
+                    "The dataset is malformed, the `content` field must be a list of dicts.",
+                    line_number=idx + 1,
+                    error_source="key_value",
+                )
+            if "type" not in item:
+                raise InvalidFileFormatError(
+                    "The dataset is malformed, the `content` field must be a list of dicts with a `type` field.",
+                    line_number=idx + 1,
+                    error_source="key_value",
+                )
+            if item["type"] == "text":
+                if "text" not in item or not isinstance(item["text"], str):
+                    raise InvalidFileFormatError(
+                        "The dataset is malformed, the `text` field must be present in the `content` item field and be"
+                        f" a string. Got '{item.get('text')!r}' instead.",
+                        line_number=idx + 1,
+                        error_source="key_value",
+                    )
+            elif item["type"] == "image_url":
+                if role != "user":
+                    raise InvalidFileFormatError(
+                        "The dataset is malformed, only user messages can contain images.",
+                        line_number=idx + 1,
+                        error_source="key_value",
+                    )
+                if "image_url" not in item or not isinstance(item["image_url"], dict):
+                    raise InvalidFileFormatError(
+                        "The dataset is malformed, the `image_url` field must be present in the `content` field and "
+                        f"be a dictionary. Got {item.get('image_url')!r} instead.",
+                        line_number=idx + 1,
+                        error_source="key_value",
+                    )
+                image_data = item["image_url"].get("url")
+                if not image_data or not isinstance(image_data, str):
+                    raise InvalidFileFormatError(
+                        "The dataset is malformed, the `url` field must be present in the `image_url` field and be "
+                        f"a string. Got {image_data!r} instead.",
+                        line_number=idx + 1,
+                        error_source="key_value",
+                    )
+                if not any(
+                    image_data.startswith(f"data:image/{fmt};base64,")
+                    for fmt in ["jpeg", "png", "webp"]
+                ):
+                    raise InvalidFileFormatError(
+                        "The dataset is malformed, the `url` field must be either a JPEG, PNG or WEBP base64-encoded "
+                        "image in 'data:image/<format>;base64,<base64_encoded_image>' format. "
+                        f"Got '{image_data[:100]}...' instead.",
+                        line_number=idx + 1,
+                    )
+                if len(image_data) > MAX_BASE64_IMAGE_LENGTH:
+                    raise InvalidFileFormatError(
+                        "The dataset is malformed, the `url` field must contain base64-encoded image "
+                        f"that is less than 10MB, found ~{len(image_data) * 3 // 4} bytes.",
+                        line_number=idx + 1,
+                        error_source="key_value",
+                    )
+                num_images += 1
+            else:
+                raise InvalidFileFormatError(
+                    "The dataset is malformed, the `type` field must be either 'text' or 'image_url'. "
+                    f"Got {item['type']!r}.",
+                    line_number=idx + 1,
+                    error_source="key_value",
+                )
+        if num_images > MAX_IMAGES_PER_EXAMPLE:
+            raise InvalidFileFormatError(
+                f"The dataset is malformed, the `content` field must contain at most "
+                f"{MAX_IMAGES_PER_EXAMPLE} images, found {num_images}.",
+                line_number=idx + 1,
+                error_source="key_value",
+            )
+        # We still consider text-only messages in such format as multimodal, even if they don't have any images
+        # included - so we can process datasets with rather sparse images (i.e. not in each sample) consistently.
+        return True, num_images
+    raise InvalidFileFormatError(
+        f"Invalid content type on line {idx + 1} of the input file. Expected string or multimodal list of dicts, "
+        f"found {type(message_content)}",
+        line_number=idx + 1,
+        error_source="key_value",
+    )
 def validate_messages(
-    messages: List[Dict[str, str | bool]], idx: int, require_assistant_role: bool = True
+    messages: List[Dict[str, str | int | MessageContent]],
+    idx: int,
+    require_assistant_role: bool = True,
 ) -> None:
     """Validate the messages column.
@@ -249,15 +396,45 @@ def validate_messages(
     """
     _check_conversation_type(messages, idx)
-    has_weights = any("weight" in message for message in messages)
     previous_role = None
     assistant_role_exists = False
+    messages_are_multimodal: bool | None = None
+    total_number_of_images = 0
     for message in messages:
-        if has_weights:
-            _check_message_weight(message, idx)
+        message_weight = _check_message_weight(message, idx)
         previous_role = _check_message_role(message, previous_role, idx)
         assistant_role_exists |= previous_role == "assistant"
+        is_multimodal, number_of_images = _check_message_content(
+            message["content"], role=previous_role, idx=idx
+        )
+        # Multimodal validation
+        if number_of_images > 0 and message_weight is not None and message_weight != 0:
+            raise InvalidFileFormatError(
+                "Messages with images cannot have non-zero weights.",
+                line_number=idx + 1,
+                error_source="key_value",
+            )
+        if messages_are_multimodal is None:
+            # Detect the format of the messages in the conversation.
+            messages_are_multimodal = is_multimodal
+        elif messages_are_multimodal != is_multimodal:
+            # Due to the format limitation, we cannot mix multimodal and text only messages in the same sample.
+            raise InvalidFileFormatError(
+                "Messages in the conversation must be either all in multimodal or all in text-only format.",
+                line_number=idx + 1,
+                error_source="key_value",
+            )
+        total_number_of_images += number_of_images
+    if total_number_of_images > MAX_IMAGES_PER_EXAMPLE:
+        raise InvalidFileFormatError(
+            f"The dataset is malformed, the `messages` must contain at most {MAX_IMAGES_PER_EXAMPLE} images. "
+            f"Found {total_number_of_images} images.",
+            line_number=idx + 1,
+            error_source="key_value",
+        )
     _check_conversation_roles(require_assistant_role, assistant_role_exists, idx)
@@ -347,12 +524,7 @@ def validate_preference_openai(example: Dict[str, Any], idx: int = 0) -> None:
                 error_source="key_value",
             )
-        if not isinstance(example[key][0]["content"], str):
-            raise InvalidFileFormatError(
-                message=f"The dataset is malformed, the 'content' field in `{key}` must be a string on line {idx + 1}.",
-                line_number=idx + 1,
-                error_source="key_value",
-            )
+        _check_message_content(example[key][0]["content"], role="assistant", idx=idx)
 def _check_utf8(file: Path) -> Dict[str, Any]:
@@ -454,8 +626,7 @@ def _check_csv(file: Path, purpose: FilePurpose | str) -> Dict[str, Any]:
             report_dict["load_csv"] = False
             if idx < 0:
                 report_dict["message"] = (
-                    "Unable to decode file. "
-                    "File may be empty or in an unsupported format. "
+                    "Unable to decode file. File may be empty or in an unsupported format. "
                 )
             else:
                 report_dict["message"] = (
@@ -542,13 +713,10 @@ def _check_jsonl(file: Path, purpose: FilePurpose | str) -> Dict[str, Any]:
                         )
                     else:
                         for column in JSONL_REQUIRED_COLUMNS_MAP[current_format]:
-                            if not isinstance(json_line[column], str):
-                                raise InvalidFileFormatError(
-                                    message=f'Invalid value type for "{column}" key on line {idx + 1}. '
-                                    f"Expected string. Found {type(json_line[column])}.",
-                                    line_number=idx + 1,
-                                    error_source="key_value",
-                                )
+                            role = "assistant" if column in {"completion"} else "user"
+                            _check_message_content(
+                                json_line[column], role=role, idx=idx
+                            )
                     if dataset_format is None:
                         dataset_format = current_format
@@ -578,8 +746,7 @@ def _check_jsonl(file: Path, purpose: FilePurpose | str) -> Dict[str, Any]:
             report_dict["load_json"] = False
             if idx < 0:
                 report_dict["message"] = (
-                    "Unable to decode file. "
-                    "File may be empty or in an unsupported format. "
+                    "Unable to decode file. File may be empty or in an unsupported format. "
                 )
             else:
                 report_dict["message"] = (

{together-1.5.32.dist-info → together-1.5.34.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: together
-Version: 1.5.32
+Version: 1.5.34
 Summary: Python client for Together's Cloud Platform! Note: SDK 2.0 is now available at https://github.com/togethercomputer/together-py
 License: Apache-2.0
 License-File: LICENSE

{together-1.5.32.dist-info → together-1.5.34.dist-info}/RECORD RENAMED Viewed

@@ -8,13 +8,13 @@ together/cli/api/completions.py,sha256=l-Zw5t7hojL3w8xd_mitS2NRB72i5Z0xwkzH0rT5X
 together/cli/api/endpoints.py,sha256=S3px19iGTKy5KS1nuKrvUUMoqc_KtrZHyIwjwjqX7uQ,14624
 together/cli/api/evaluation.py,sha256=36SsujC5qicf-8l8GA8wqRtEC8NKzsAjL-_nYhePpQM,14691
 together/cli/api/files.py,sha256=QLYEXRkY8J2Gg1SbTCtzGfoTMvosoeACNK83L_oLubs,3397
-together/cli/api/finetune.py,sha256=Hmn8UrDNCPiLPDilnKPjnx8V27WliAVTZgQKb6SnHwc,19625
+together/cli/api/finetune.py,sha256=fgzZ4QDWU5sr-Fw0vLkPHvz-qSzLq_WIJr9kGpqlzhk,20332
 together/cli/api/images.py,sha256=GADSeaNUHUVMtWovmccGuKc28IJ9E_v4vAEwYHJhu5o,2645
 together/cli/api/models.py,sha256=BRWRiguuJ8OwAD8crajpZ7RyCHA35tyOZvi3iLWQ7k4,3679
-together/cli/api/utils.py,sha256=IuqYWPnLI38_Bqd7lj8V_SnGdYc59pRmMbQmciS4FsM,1326
+together/cli/api/utils.py,sha256=MRK6siAmDtVuXz4Vi0Jkf9RhlSwB2Sjk88OK34Z-I6E,4388
 together/cli/cli.py,sha256=PVahUjOfAQIjo209FoPKljcCA_OIpOYQ9MAsCjfEMu0,2134
 together/client.py,sha256=KD33kAPkWTcnXjge4rLK_L3UsJYsxNUkvL6b9SgTEf0,6324
-together/constants.py,sha256=IaKMIamFia9nyq8jPrmqu5y0YL5mC_474AAIUXYFsdk,1964
+together/constants.py,sha256=vhdf231xNLw9jMVSyFZrcDLCEbuYcXr4U_K_MX9q_xk,2205
 together/error.py,sha256=HU6247CyzCFjaxL9A0XYbXZ6fY_ebRg0FEYjI4Skogs,5515
 together/filemanager.py,sha256=bynQp2yGoFMZcgVtgFlkYxTbnk6n_GxdiEpY0q50kbk,19448
 together/legacy/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -40,13 +40,13 @@ together/resources/embeddings.py,sha256=PTvLb82yjG_-iQOyuhsilp77Fr7gZ0o6WD2KeRnK
 together/resources/endpoints.py,sha256=BP75wUEcOtpiUbfLAQH5GX2RL8_RnM522-D8Iz7_LUU,20378
 together/resources/evaluation.py,sha256=eYSs9HUpW51XZjX-yNlFZlLapsuEDINJ8BjxJoYa4U0,31443
 together/resources/files.py,sha256=_uK5xzriXNOGNw3tQGuTbCaxBRo6Az6_cXOUtBNFzDk,5434
-together/resources/finetune.py,sha256=rOclrA4GCu1wrE-D0-hc0ac7lJksucbIW6OOxQT0q7I,52981
+together/resources/finetune.py,sha256=phlG5y6lHwoGC_eZbAU8Wx5Ohb4MKZwZD5AjEAoLpR4,53907
 together/resources/images.py,sha256=FHXkcnzyj2JLw4YF1NH56hgISEeCO0Sg_SvTCcTJaOo,4831
 together/resources/models.py,sha256=WpP-x25AXYpmu-VKu_X4Up-zHwpWBBvPRpbV4FsWQrU,8266
 together/resources/rerank.py,sha256=3Ju_aRSyZ1s_3zCSNZnSnEJErUVmt2xa3M8z1nvejMA,3931
 together/resources/videos.py,sha256=Dn7vslH1pZVw4WYvH-69fjzqLZdKHkTK-lIbFkxh0w0,11144
 together/together_response.py,sha256=a3dgKMPDrlfKQwxYENfNt2T4l2vSZxRWMixhHSy-q3E,1308
-together/types/__init__.py,sha256=nh6yT1mmlmkLGQE3DYeJYNkSAIIIxNep15jwZWICz40,4492
+together/types/__init__.py,sha256=WQLU_r3Decd09rpA8XGYqKXB5ZnPgyaUsTaF7010cr8,4537
 together/types/abstract.py,sha256=1lFQI_3WjsR_t1128AeKW0aTk6EiM6Gh1J3ZuyLLPao,642
 together/types/audio_speech.py,sha256=pUzqpx7NCjtPIq91xO2k0psetzLz29NTHHm6DS0k8Xg,9682
 together/types/batch.py,sha256=KiI5i1En7cyIUxHhVIGoQk6Wlw19c0PXSqDWwc2KZ2c,1140
@@ -59,7 +59,7 @@ together/types/endpoints.py,sha256=EzNhHOoQ_D9fUdNQtxQPeSWiFzdFLqpNodN0YLmv_h0,4
 together/types/error.py,sha256=OVlCs3cx_2WhZK4JzHT8SQyRIIqKOP1AZQ4y1PydjAE,370
 together/types/evaluation.py,sha256=9gCAgzAwFD95MWnSgvxnSYFF27wKOTqIGn-wSOpFt2M,2385
 together/types/files.py,sha256=_pB_q8kU5QH7WE3Y8Uro6LGsgK_5zrGYzJREZL9cRH0,2025
-together/types/finetune.py,sha256=vpbmyRRV0gJryi0F7YUIbUk5Ya8CPmi0mJ95ZjpfpbE,11959
+together/types/finetune.py,sha256=VB91_eXWlzdZY4qrxGo5_U1naTnx6Thsqi7vlflVjhY,12710
 together/types/images.py,sha256=IsrmIM2FVeG-kP4vhZUx5fG5EhOJ-d8fefrAmOVKNDs,926
 together/types/models.py,sha256=V8bcy1c3uTmqwnTVphbYLF2AJ6l2P2724njl36TzfHQ,2878
 together/types/rerank.py,sha256=qZfuXOn7MZ6ly8hpJ_MZ7OU_Bi1-cgYNSB20Wja8Qkk,1061
@@ -67,11 +67,11 @@ together/types/videos.py,sha256=KCLk8CF0kbA_51qnHOzAWg5VA6HTlwnY-sTZ2lUR0Eo,1861
 together/utils/__init__.py,sha256=5fqvj4KT2rHxKSQot2TSyV_HcvkvkGiqAiaYuJwqtm0,786
 together/utils/_log.py,sha256=5IYNI-jYzxyIS-pUvhb0vE_Muo3MA7GgBhsu66TKP2w,1951
 together/utils/api_helpers.py,sha256=2K0O6qeEQ2zVFvi5NBN5m2kjZJaS3-JfKFecQ7SmGaw,3746
-together/utils/files.py,sha256=mWFFpsgVPDQg1ZCb-oTrDUFv3aXg1AItgtwXvDsFegI,25047
+together/utils/files.py,sha256=hgXr-7cHZPCIgwStgKrZbGMbAasiOCuoqlv6WDApQWc,32022
 together/utils/tools.py,sha256=H2MTJhEqtBllaDvOyZehIO_IVNK3P17rSDeILtJIVag,2964
 together/version.py,sha256=p03ivHyE0SyWU4jAnRTBi_sOwywVWoZPU4g2gzRgG-Y,126
-together-1.5.32.dist-info/METADATA,sha256=lQExfe_6VE3LiQDX6E3zbVVsNwlPZ2vzQMuxtTaV7M8,17415
-together-1.5.32.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
-together-1.5.32.dist-info/entry_points.txt,sha256=G-b5NKW6lUUf1V1fH8IPTBb7jXnK7lhbX9H1zTEJXPs,50
-together-1.5.32.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
-together-1.5.32.dist-info/RECORD,,
+together-1.5.34.dist-info/METADATA,sha256=i3ArZRZ58P5FnityXKBWyKCB03rdK6N1QQ06_L3CyEI,17415
+together-1.5.34.dist-info/WHEEL,sha256=zp0Cn7JsFoX2ATtOhtaFYIiE2rmFAD4OcMhtUki8W3U,88
+together-1.5.34.dist-info/entry_points.txt,sha256=G-b5NKW6lUUf1V1fH8IPTBb7jXnK7lhbX9H1zTEJXPs,50
+together-1.5.34.dist-info/licenses/LICENSE,sha256=xx0jnfkXJvxRnG63LTGOxlggYnIysveWIZ6H3PNdCrQ,11357
+together-1.5.34.dist-info/RECORD,,

{together-1.5.32.dist-info → together-1.5.34.dist-info}/WHEEL RENAMED Viewed

File without changes

{together-1.5.32.dist-info → together-1.5.34.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{together-1.5.32.dist-info → together-1.5.34.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

together 1.5.32__py3-none-any.whl → 1.5.34__py3-none-any.whl

together 1.5.32py3-none-any.whl → 1.5.34py3-none-any.whl