PyPI - together - Versions diffs - 1.2.11__py3-none-any.whl → 2.0.0a8__py3-none-any.whl - Mend

together 1.2.11py3-none-any.whl → 2.0.0a8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (201) hide show

together/__init__.py +101 -63
together/_base_client.py +1995 -0
together/_client.py +1033 -0
together/_compat.py +219 -0
together/_constants.py +14 -0
together/_exceptions.py +108 -0
together/_files.py +123 -0
together/_models.py +857 -0
together/_qs.py +150 -0
together/_resource.py +43 -0
together/_response.py +830 -0
together/_streaming.py +370 -0
together/_types.py +260 -0
together/_utils/__init__.py +64 -0
together/_utils/_compat.py +45 -0
together/_utils/_datetime_parse.py +136 -0
together/_utils/_logs.py +25 -0
together/_utils/_proxy.py +65 -0
together/_utils/_reflection.py +42 -0
together/_utils/_resources_proxy.py +24 -0
together/_utils/_streams.py +12 -0
together/_utils/_sync.py +58 -0
together/_utils/_transform.py +457 -0
together/_utils/_typing.py +156 -0
together/_utils/_utils.py +421 -0
together/_version.py +4 -0
together/lib/.keep +4 -0
together/lib/__init__.py +23 -0
together/lib/cli/api/endpoints.py +467 -0
together/lib/cli/api/evals.py +588 -0
together/{cli → lib/cli}/api/files.py +20 -17
together/lib/cli/api/fine_tuning.py +566 -0
together/lib/cli/api/models.py +140 -0
together/lib/cli/api/utils.py +50 -0
together/{cli → lib/cli}/cli.py +17 -23
together/lib/constants.py +61 -0
together/lib/resources/__init__.py +11 -0
together/lib/resources/files.py +999 -0
together/lib/resources/fine_tuning.py +280 -0
together/lib/resources/models.py +35 -0
together/lib/types/__init__.py +13 -0
together/lib/types/error.py +9 -0
together/lib/types/fine_tuning.py +455 -0
together/{utils → lib/utils}/__init__.py +7 -10
together/{utils → lib/utils}/_log.py +18 -13
together/lib/utils/files.py +628 -0
together/lib/utils/serializer.py +10 -0
together/{utils → lib/utils}/tools.py +17 -2
together/resources/__init__.py +225 -24
together/resources/audio/__init__.py +75 -0
together/resources/audio/audio.py +198 -0
together/resources/audio/speech.py +605 -0
together/resources/audio/transcriptions.py +282 -0
together/resources/audio/translations.py +256 -0
together/resources/audio/voices.py +135 -0
together/resources/batches.py +417 -0
together/resources/chat/__init__.py +30 -21
together/resources/chat/chat.py +102 -0
together/resources/chat/completions.py +1063 -257
together/resources/code_interpreter/__init__.py +33 -0
together/resources/code_interpreter/code_interpreter.py +258 -0
together/resources/code_interpreter/sessions.py +135 -0
together/resources/completions.py +890 -225
together/resources/embeddings.py +172 -68
together/resources/endpoints.py +711 -0
together/resources/evals.py +452 -0
together/resources/files.py +397 -120
together/resources/fine_tuning.py +1033 -0
together/resources/hardware.py +181 -0
together/resources/images.py +256 -108
together/resources/jobs.py +214 -0
together/resources/models.py +251 -44
together/resources/rerank.py +190 -92
together/resources/videos.py +374 -0
together/types/__init__.py +66 -73
together/types/audio/__init__.py +10 -0
together/types/audio/speech_create_params.py +75 -0
together/types/audio/transcription_create_params.py +54 -0
together/types/audio/transcription_create_response.py +111 -0
together/types/audio/translation_create_params.py +40 -0
together/types/audio/translation_create_response.py +70 -0
together/types/audio/voice_list_response.py +23 -0
together/types/audio_speech_stream_chunk.py +16 -0
together/types/autoscaling.py +13 -0
together/types/autoscaling_param.py +15 -0
together/types/batch_create_params.py +24 -0
together/types/batch_create_response.py +14 -0
together/types/batch_job.py +45 -0
together/types/batch_list_response.py +10 -0
together/types/chat/__init__.py +18 -0
together/types/chat/chat_completion.py +60 -0
together/types/chat/chat_completion_chunk.py +61 -0
together/types/chat/chat_completion_structured_message_image_url_param.py +18 -0
together/types/chat/chat_completion_structured_message_text_param.py +13 -0
together/types/chat/chat_completion_structured_message_video_url_param.py +18 -0
together/types/chat/chat_completion_usage.py +13 -0
together/types/chat/chat_completion_warning.py +9 -0
together/types/chat/completion_create_params.py +329 -0
together/types/code_interpreter/__init__.py +5 -0
together/types/code_interpreter/session_list_response.py +31 -0
together/types/code_interpreter_execute_params.py +45 -0
together/types/completion.py +42 -0
together/types/completion_chunk.py +66 -0
together/types/completion_create_params.py +138 -0
together/types/dedicated_endpoint.py +44 -0
together/types/embedding.py +24 -0
together/types/embedding_create_params.py +31 -0
together/types/endpoint_create_params.py +43 -0
together/types/endpoint_list_avzones_response.py +11 -0
together/types/endpoint_list_params.py +18 -0
together/types/endpoint_list_response.py +41 -0
together/types/endpoint_update_params.py +27 -0
together/types/eval_create_params.py +263 -0
together/types/eval_create_response.py +16 -0
together/types/eval_list_params.py +21 -0
together/types/eval_list_response.py +10 -0
together/types/eval_status_response.py +100 -0
together/types/evaluation_job.py +139 -0
together/types/execute_response.py +108 -0
together/types/file_delete_response.py +13 -0
together/types/file_list.py +12 -0
together/types/file_purpose.py +9 -0
together/types/file_response.py +31 -0
together/types/file_type.py +7 -0
together/types/fine_tuning_cancel_response.py +194 -0
together/types/fine_tuning_content_params.py +24 -0
together/types/fine_tuning_delete_params.py +11 -0
together/types/fine_tuning_delete_response.py +12 -0
together/types/fine_tuning_list_checkpoints_response.py +21 -0
together/types/fine_tuning_list_events_response.py +12 -0
together/types/fine_tuning_list_response.py +199 -0
together/types/finetune_event.py +41 -0
together/types/finetune_event_type.py +33 -0
together/types/finetune_response.py +177 -0
together/types/hardware_list_params.py +16 -0
together/types/hardware_list_response.py +58 -0
together/types/image_data_b64.py +15 -0
together/types/image_data_url.py +15 -0
together/types/image_file.py +23 -0
together/types/image_generate_params.py +85 -0
together/types/job_list_response.py +47 -0
together/types/job_retrieve_response.py +43 -0
together/types/log_probs.py +18 -0
together/types/model_list_response.py +10 -0
together/types/model_object.py +42 -0
together/types/model_upload_params.py +36 -0
together/types/model_upload_response.py +23 -0
together/types/rerank_create_params.py +36 -0
together/types/rerank_create_response.py +36 -0
together/types/tool_choice.py +23 -0
together/types/tool_choice_param.py +23 -0
together/types/tools_param.py +23 -0
together/types/training_method_dpo.py +22 -0
together/types/training_method_sft.py +18 -0
together/types/video_create_params.py +86 -0
together/types/video_job.py +57 -0
together-2.0.0a8.dist-info/METADATA +680 -0
together-2.0.0a8.dist-info/RECORD +164 -0
{together-1.2.11.dist-info → together-2.0.0a8.dist-info}/WHEEL +1 -1
together-2.0.0a8.dist-info/entry_points.txt +2 -0
{together-1.2.11.dist-info → together-2.0.0a8.dist-info/licenses}/LICENSE +1 -1
together/abstract/api_requestor.py +0 -723
together/cli/api/chat.py +0 -276
together/cli/api/completions.py +0 -119
together/cli/api/finetune.py +0 -272
together/cli/api/images.py +0 -82
together/cli/api/models.py +0 -42
together/client.py +0 -157
together/constants.py +0 -31
together/error.py +0 -191
together/filemanager.py +0 -388
together/legacy/__init__.py +0 -0
together/legacy/base.py +0 -27
together/legacy/complete.py +0 -93
together/legacy/embeddings.py +0 -27
together/legacy/files.py +0 -146
together/legacy/finetune.py +0 -177
together/legacy/images.py +0 -27
together/legacy/models.py +0 -44
together/resources/finetune.py +0 -489
together/together_response.py +0 -50
together/types/abstract.py +0 -26
together/types/chat_completions.py +0 -171
together/types/common.py +0 -65
together/types/completions.py +0 -104
together/types/embeddings.py +0 -35
together/types/error.py +0 -16
together/types/files.py +0 -89
together/types/finetune.py +0 -265
together/types/images.py +0 -42
together/types/models.py +0 -44
together/types/rerank.py +0 -43
together/utils/api_helpers.py +0 -84
together/utils/files.py +0 -204
together/version.py +0 -6
together-1.2.11.dist-info/METADATA +0 -408
together-1.2.11.dist-info/RECORD +0 -58
together-1.2.11.dist-info/entry_points.txt +0 -3
/together/{abstract → lib/cli}/__init__.py +0 -0
/together/{cli → lib/cli/api}/__init__.py +0 -0
/together/{cli/api/__init__.py → py.typed} +0 -0

together/types/finetune.py DELETED Viewed

@@ -1,265 +0,0 @@
-from __future__ import annotations
-from enum import Enum
-from typing import List, Literal
-from pydantic import Field, validator, field_validator
-from together.types.abstract import BaseModel
-from together.types.common import (
-    ObjectType,
-)
-class FinetuneJobStatus(str, Enum):
-    """
-    Possible fine-tune job status
-    """
-    STATUS_PENDING = "pending"
-    STATUS_QUEUED = "queued"
-    STATUS_RUNNING = "running"
-    STATUS_COMPRESSING = "compressing"
-    STATUS_UPLOADING = "uploading"
-    STATUS_CANCEL_REQUESTED = "cancel_requested"
-    STATUS_CANCELLED = "cancelled"
-    STATUS_ERROR = "error"
-    STATUS_USER_ERROR = "user_error"
-    STATUS_COMPLETED = "completed"
-class FinetuneEventLevels(str, Enum):
-    """
-    Fine-tune job event status levels
-    """
-    NULL = ""
-    INFO = "Info"
-    WARNING = "Warning"
-    ERROR = "Error"
-    LEGACY_INFO = "info"
-    LEGACY_IWARNING = "warning"
-    LEGACY_IERROR = "error"
-class FinetuneEventType(str, Enum):
-    """
-    Fine-tune job event types
-    """
-    JOB_PENDING = "JOB_PENDING"
-    JOB_START = "JOB_START"
-    JOB_STOPPED = "JOB_STOPPED"
-    MODEL_DOWNLOADING = "MODEL_DOWNLOADING"
-    MODEL_DOWNLOAD_COMPLETE = "MODEL_DOWNLOAD_COMPLETE"
-    TRAINING_DATA_DOWNLOADING = "TRAINING_DATA_DOWNLOADING"
-    TRAINING_DATA_DOWNLOAD_COMPLETE = "TRAINING_DATA_DOWNLOAD_COMPLETE"
-    VALIDATION_DATA_DOWNLOADING = "VALIDATION_DATA_DOWNLOADING"
-    VALIDATION_DATA_DOWNLOAD_COMPLETE = "VALIDATION_DATA_DOWNLOAD_COMPLETE"
-    WANDB_INIT = "WANDB_INIT"
-    TRAINING_START = "TRAINING_START"
-    CHECKPOINT_SAVE = "CHECKPOINT_SAVE"
-    BILLING_LIMIT = "BILLING_LIMIT"
-    EPOCH_COMPLETE = "EPOCH_COMPLETE"
-    EVAL_COMPLETE = "EVAL_COMPLETE"
-    TRAINING_COMPLETE = "TRAINING_COMPLETE"
-    MODEL_COMPRESSING = "COMPRESSING_MODEL"
-    MODEL_COMPRESSION_COMPLETE = "MODEL_COMPRESSION_COMPLETE"
-    MODEL_UPLOADING = "MODEL_UPLOADING"
-    MODEL_UPLOAD_COMPLETE = "MODEL_UPLOAD_COMPLETE"
-    JOB_COMPLETE = "JOB_COMPLETE"
-    JOB_ERROR = "JOB_ERROR"
-    JOB_USER_ERROR = "JOB_USER_ERROR"
-    CANCEL_REQUESTED = "CANCEL_REQUESTED"
-    JOB_RESTARTED = "JOB_RESTARTED"
-    REFUND = "REFUND"
-    WARNING = "WARNING"
-class DownloadCheckpointType(Enum):
-    DEFAULT = "default"
-    MERGED = "merged"
-    ADAPTER = "adapter"
-class FinetuneEvent(BaseModel):
-    """
-    Fine-tune event type
-    """
-    # object type
-    object: Literal[ObjectType.FinetuneEvent]
-    # created at datetime stamp
-    created_at: str | None = None
-    # event log level
-    level: FinetuneEventLevels | None = None
-    # event message string
-    message: str | None = None
-    # event type
-    type: FinetuneEventType | None = None
-    # optional: model parameter count
-    param_count: int | None = None
-    # optional: dataset token count
-    token_count: int | None = None
-    # optional: weights & biases url
-    wandb_url: str | None = None
-    # event hash
-    hash: str | None = None
-class TrainingType(BaseModel):
-    """
-    Abstract training type
-    """
-    type: str
-class FullTrainingType(TrainingType):
-    """
-    Training type for full fine-tuning
-    """
-    type: str = "Full"
-class LoRATrainingType(TrainingType):
-    """
-    Training type for LoRA adapters training
-    """
-    lora_r: int
-    lora_alpha: int
-    lora_dropout: float = 0.0
-    lora_trainable_modules: str = "all-linear"
-    type: str = "Lora"
-class FinetuneRequest(BaseModel):
-    """
-    Fine-tune request type
-    """
-    # training file ID
-    training_file: str
-    # validation file id
-    validation_file: str | None = None
-    # base model string
-    model: str
-    # number of epochs to train for
-    n_epochs: int
-    # training learning rate
-    learning_rate: float
-    # number of checkpoints to save
-    n_checkpoints: int | None = None
-    # number of evaluation loops to run
-    n_evals: int | None = None
-    # training batch size
-    batch_size: int | None = None
-    # up to 40 character suffix for output model name
-    suffix: str | None = None
-    # weights & biases api key
-    wandb_key: str | None = None
-    training_type: FullTrainingType | LoRATrainingType | None = None
-class FinetuneResponse(BaseModel):
-    """
-    Fine-tune API response type
-    """
-    # job ID
-    id: str | None = None
-    # training file id
-    training_file: str | None = None
-    # validation file id
-    validation_file: str | None = None
-    # base model name
-    model: str | None = None
-    # output model name
-    output_name: str | None = Field(None, alias="model_output_name")
-    # adapter output name
-    adapter_output_name: str | None = None
-    # number of epochs
-    n_epochs: int | None = None
-    # number of checkpoints to save
-    n_checkpoints: int | None = None
-    # number of evaluation loops
-    n_evals: int | None = None
-    # training batch size
-    batch_size: int | None = None
-    # training learning rate
-    learning_rate: float | None = None
-    # number of steps between evals
-    eval_steps: int | None = None
-    # training type
-    training_type: TrainingType | None = None
-    # created/updated datetime stamps
-    created_at: str | None = None
-    updated_at: str | None = None
-    # job status
-    status: FinetuneJobStatus | None = None
-    # job id
-    job_id: str | None = None
-    # list of fine-tune events
-    events: List[FinetuneEvent] | None = None
-    # dataset token count
-    token_count: int | None = None
-    # model parameter count
-    param_count: int | None = None
-    # fine-tune job price
-    total_price: int | None = None
-    # total number of training steps
-    total_steps: int | None = None
-    # number of steps completed (incrementing counter)
-    steps_completed: int | None = None
-    # number of epochs completed (incrementing counter)
-    epochs_completed: int | None = None
-    # number of evaluation loops completed (incrementing counter)
-    evals_completed: int | None = None
-    # place in job queue (decrementing counter)
-    queue_depth: int | None = None
-    # weights & biases project name
-    wandb_project_name: str | None = None
-    # weights & biases job url
-    wandb_url: str | None = None
-    # training file metadata
-    training_file_num_lines: int | None = Field(None, alias="TrainingFileNumLines")
-    training_file_size: int | None = Field(None, alias="TrainingFileSize")
-    @field_validator("training_type")
-    @classmethod
-    def validate_training_type(cls, v: TrainingType) -> TrainingType:
-        if v.type == "Full" or v.type == "":
-            return FullTrainingType(**v.model_dump())
-        elif v.type == "Lora":
-            return LoRATrainingType(**v.model_dump())
-        else:
-            raise ValueError("Unknown training type")
-class FinetuneList(BaseModel):
-    # object type
-    object: Literal["list"] | None = None
-    # list of fine-tune job objects
-    data: List[FinetuneResponse] | None = None
-class FinetuneListEvents(BaseModel):
-    # object type
-    object: Literal["list"] | None = None
-    # list of fine-tune events
-    data: List[FinetuneEvent] | None = None
-class FinetuneDownloadResult(BaseModel):
-    # object type
-    object: Literal["local"] | None = None
-    # fine-tune job id
-    id: str | None = None
-    # checkpoint step number
-    checkpoint_step: int | None = None
-    # local path filename
-    filename: str | None = None
-    # size in bytes
-    size: int | None = None

together/types/images.py DELETED Viewed

@@ -1,42 +0,0 @@
-from __future__ import annotations
-from typing import List, Literal
-from together.types.abstract import BaseModel
-class ImageRequest(BaseModel):
-    # input or list of inputs
-    prompt: str
-    # model to query
-    model: str
-    # num generation steps
-    steps: int | None = 20
-    # seed
-    seed: int | None = None
-    # number of results to return
-    n: int | None = 1
-    # pixel height
-    height: int | None = 1024
-    # pixel width
-    width: int | None = 1024
-    # negative prompt
-    negative_prompt: str | None = None
-class ImageChoicesData(BaseModel):
-    # response index
-    index: int
-    # base64 image response
-    b64_json: str
-class ImageResponse(BaseModel):
-    # job id
-    id: str | None = None
-    # query model
-    model: str | None = None
-    # object type
-    object: Literal["list"] | None = None
-    # list of embedding choices
-    data: List[ImageChoicesData] | None = None

together/types/models.py DELETED Viewed

@@ -1,44 +0,0 @@
-from __future__ import annotations
-from enum import Enum
-from typing import Literal
-from together.types.abstract import BaseModel
-from together.types.common import ObjectType
-class ModelType(str, Enum):
-    CHAT = "chat"
-    LANGUAGE = "language"
-    CODE = "code"
-    IMAGE = "image"
-    EMBEDDING = "embedding"
-    MODERATION = "moderation"
-    RERANK = "rerank"
-class PricingObject(BaseModel):
-    input: float | None = None
-    output: float | None = None
-    hourly: float | None = None
-    base: float | None = None
-    finetune: float | None = None
-class ModelObject(BaseModel):
-    # model id
-    id: str
-    # object type
-    object: Literal[ObjectType.Model]
-    created: int | None = None
-    # model type
-    type: ModelType | None = None
-    # pretty name
-    display_name: str | None = None
-    # model creator organization
-    organization: str | None = None
-    # link to model resource
-    link: str | None = None
-    license: str | None = None
-    context_length: int | None = None
-    pricing: PricingObject

together/types/rerank.py DELETED Viewed

@@ -1,43 +0,0 @@
-from __future__ import annotations
-from typing import List, Literal, Dict, Any
-from together.types.abstract import BaseModel
-from together.types.common import UsageData
-class RerankRequest(BaseModel):
-    # model to query
-    model: str
-    # input or list of inputs
-    query: str
-    # list of documents
-    documents: List[str] | List[Dict[str, Any]]
-    # return top_n results
-    top_n: int | None = None
-    # boolean to return documents
-    return_documents: bool = False
-    # field selector for documents
-    rank_fields: List[str] | None = None
-class RerankChoicesData(BaseModel):
-    # response index
-    index: int
-    # object type
-    relevance_score: float
-    # rerank response
-    document: Dict[str, Any] | None = None
-class RerankResponse(BaseModel):
-    # job id
-    id: str | None = None
-    # object type
-    object: Literal["rerank"] | None = None
-    # query model
-    model: str | None = None
-    # list of reranked results
-    results: List[RerankChoicesData] | None = None
-    # usage stats
-    usage: UsageData | None = None

together/utils/api_helpers.py DELETED Viewed

@@ -1,84 +0,0 @@
-from __future__ import annotations
-import json
-import os
-import platform
-from typing import TYPE_CHECKING, Any, Dict
-if TYPE_CHECKING:
-    from _typeshed import SupportsKeysAndGetItem
-import together
-from together import error
-from together.utils._log import _console_log_level
-def get_headers(
-    method: str | None = None,
-    api_key: str | None = None,
-    extra: "SupportsKeysAndGetItem[str, Any] | None" = None,
-) -> Dict[str, str]:
-    """
-    Generates request headers with API key, metadata, and supplied headers
-    Args:
-        method (str, optional): HTTP request type (POST, GET, etc.)
-            Defaults to None.
-        api_key (str, optional): API key to add as an Authorization header.
-            Defaults to None.
-        extra (SupportsKeysAndGetItem[str, Any], optional): Additional headers to add to request.
-            Defaults to None.
-    Returns:
-        headers (Dict[str, str]): Compiled headers from data
-    """
-    user_agent = "Together/v1 PythonBindings/%s" % (together.version,)
-    uname_without_node = " ".join(
-        v for k, v in platform.uname()._asdict().items() if k != "node"
-    )
-    ua = {
-        "bindings_version": together.version,
-        "httplib": "requests",
-        "lang": "python",
-        "lang_version": platform.python_version(),
-        "platform": platform.platform(),
-        "publisher": "together",
-        "uname": uname_without_node,
-    }
-    headers: Dict[str, Any] = {
-        "X-Together-Client-User-Agent": json.dumps(ua),
-        "Authorization": f"Bearer {default_api_key(api_key)}",
-        "User-Agent": user_agent,
-    }
-    if _console_log_level():
-        headers["Together-Debug"] = _console_log_level()
-    if extra:
-        headers.update(extra)
-    return headers
-def default_api_key(api_key: str | None = None) -> str | None:
-    """
-    API key fallback logic from input argument and environment variable
-    Args:
-        api_key (str, optional): Supplied API key. This argument takes priority over env var
-    Returns:
-        together_api_key (str): Returns API key from supplied input or env var
-    Raises:
-        together.error.AuthenticationError: if API key not found
-    """
-    if api_key:
-        return api_key
-    if os.environ.get("TOGETHER_API_KEY"):
-        return os.environ.get("TOGETHER_API_KEY")
-    raise error.AuthenticationError(together.constants.MISSING_API_KEY_MESSAGE)

together/utils/files.py DELETED Viewed

@@ -1,204 +0,0 @@
-from __future__ import annotations
-import json
-import os
-from pathlib import Path
-from traceback import format_exc
-from typing import Any, Dict
-from pyarrow import ArrowInvalid, parquet
-from together.constants import (
-    MAX_FILE_SIZE_GB,
-    MIN_SAMPLES,
-    NUM_BYTES_IN_GB,
-    PARQUET_EXPECTED_COLUMNS,
-)
-def check_file(
-    file: Path | str,
-) -> Dict[str, Any]:
-    if not isinstance(file, Path):
-        file = Path(file)
-    report_dict = {
-        "is_check_passed": True,
-        "message": "Checks passed",
-        "found": None,
-        "file_size": None,
-        "utf8": None,
-        "line_type": None,
-        "text_field": None,
-        "key_value": None,
-        "min_samples": None,
-        "num_samples": None,
-        "load_json": None,
-    }
-    if not file.is_file():
-        report_dict["found"] = False
-        report_dict["is_check_passed"] = False
-        return report_dict
-    else:
-        report_dict["found"] = True
-    file_size = os.stat(file.as_posix()).st_size
-    if file_size > MAX_FILE_SIZE_GB * NUM_BYTES_IN_GB:
-        report_dict["message"] = (
-            f"Maximum supported file size is {MAX_FILE_SIZE_GB} GB. Found file with size of {round(file_size / NUM_BYTES_IN_GB ,3)} GB."
-        )
-        report_dict["is_check_passed"] = False
-    elif file_size == 0:
-        report_dict["message"] = "File is empty"
-        report_dict["file_size"] = 0
-        report_dict["is_check_passed"] = False
-        return report_dict
-    else:
-        report_dict["file_size"] = file_size
-    if file.suffix == ".jsonl":
-        report_dict["filetype"] = "jsonl"
-        data_report_dict = _check_jsonl(file)
-    elif file.suffix == ".parquet":
-        report_dict["filetype"] = "parquet"
-        data_report_dict = _check_parquet(file)
-    else:
-        report_dict["filetype"] = (
-            f"Unknown extension of file {file}. "
-            "Only files with extensions .jsonl and .parquet are supported."
-        )
-        report_dict["is_check_passed"] = False
-    report_dict.update(data_report_dict)
-    return report_dict
-def _check_jsonl(file: Path) -> Dict[str, Any]:
-    report_dict: Dict[str, Any] = {}
-    # Check that the file is UTF-8 encoded. If not report where the error occurs.
-    try:
-        with file.open(encoding="utf-8") as f:
-            f.read()
-        report_dict["utf8"] = True
-    except UnicodeDecodeError as e:
-        report_dict["utf8"] = False
-        report_dict["message"] = f"File is not UTF-8 encoded. Error raised: {e}."
-        report_dict["is_check_passed"] = False
-        return report_dict
-    with file.open() as f:
-        # idx must be instantiated so decode errors (e.g. file is a tar) or empty files are caught
-        idx = -1
-        try:
-            for idx, line in enumerate(f):
-                json_line = json.loads(line)  # each line in jsonlines should be a json
-                if not isinstance(json_line, dict):
-                    report_dict["line_type"] = False
-                    report_dict["message"] = (
-                        f"Error parsing file. Invalid format on line {idx + 1} of the input file. "
-                        'Example of valid json: {"text": "my sample string"}. '
-                    )
-                    report_dict["is_check_passed"] = False
-                if "text" not in json_line.keys():
-                    report_dict["text_field"] = False
-                    report_dict["message"] = (
-                        f"Missing 'text' field was found on line {idx + 1} of the the input file. "
-                        "Expected format: {'text': 'my sample string'}. "
-                    )
-                    report_dict["is_check_passed"] = False
-                else:
-                    # check to make sure the value of the "text" key is a string
-                    if not isinstance(json_line["text"], str):
-                        report_dict["key_value"] = False
-                        report_dict["message"] = (
-                            f'Invalid value type for "text" key on line {idx + 1}. '
-                            f'Expected string. Found {type(json_line["text"])}.'
-                        )
-                        report_dict["is_check_passed"] = False
-            # make sure this is outside the for idx, line in enumerate(f): for loop
-            if idx + 1 < MIN_SAMPLES:
-                report_dict["min_samples"] = False
-                report_dict["message"] = (
-                    f"Processing {file} resulted in only {idx + 1} samples. "
-                    f"Our minimum is {MIN_SAMPLES} samples. "
-                )
-                report_dict["is_check_passed"] = False
-            else:
-                report_dict["num_samples"] = idx + 1
-                report_dict["min_samples"] = True
-            report_dict["load_json"] = True
-        except ValueError:
-            report_dict["load_json"] = False
-            if idx < 0:
-                report_dict["message"] = (
-                    "Unable to decode file. "
-                    "File may be empty or in an unsupported format. "
-                )
-            else:
-                report_dict["message"] = (
-                    f"Error parsing json payload. Unexpected format on line {idx + 1}."
-                )
-            report_dict["is_check_passed"] = False
-    if "text_field" not in report_dict:
-        report_dict["text_field"] = True
-    if "line_type" not in report_dict:
-        report_dict["line_type"] = True
-    if "key_value" not in report_dict:
-        report_dict["key_value"] = True
-    return report_dict
-def _check_parquet(file: Path) -> Dict[str, Any]:
-    report_dict: Dict[str, Any] = {}
-    try:
-        table = parquet.read_table(str(file), memory_map=True)
-    except ArrowInvalid:
-        report_dict["load_parquet"] = (
-            f"An exception has occurred when loading the Parquet file {file}. Please check the file for corruption. "
-            f"Exception trace:\n{format_exc()}"
-        )
-        report_dict["is_check_passed"] = False
-        return report_dict
-    column_names = table.schema.names
-    if "input_ids" not in column_names:
-        report_dict["load_parquet"] = (
-            f"Parquet file {file} does not contain the `input_ids` column."
-        )
-        report_dict["is_check_passed"] = False
-        return report_dict
-    for column_name in column_names:
-        if column_name not in PARQUET_EXPECTED_COLUMNS:
-            report_dict["load_parquet"] = (
-                f"Parquet file {file} contains an unexpected column {column_name}. "
-                f"Only columns {PARQUET_EXPECTED_COLUMNS} are supported."
-            )
-            report_dict["is_check_passed"] = False
-            return report_dict
-    num_samples = len(table)
-    if num_samples < MIN_SAMPLES:
-        report_dict["min_samples"] = (
-            f"Processing {file} resulted in only {num_samples} samples. "
-            f"Our minimum is {MIN_SAMPLES} samples. "
-        )
-        report_dict["is_check_passed"] = False
-        return report_dict
-    else:
-        report_dict["num_samples"] = num_samples
-    report_dict["is_check_passed"] = True
-    return report_dict

together/version.py DELETED Viewed

@@ -1,6 +0,0 @@
-import importlib.metadata
-VERSION = importlib.metadata.version(
-    "together"
-)  # gets version number from pyproject.toml

together 1.2.11__py3-none-any.whl → 2.0.0a8__py3-none-any.whl

together 1.2.11py3-none-any.whl → 2.0.0a8py3-none-any.whl