PyPI - sutro - Versions diffs - 0.1.37__py3-none-any.whl → 0.1.38__py3-none-any.whl - Mend

sutro 0.1.37py3-none-any.whl → 0.1.38py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of sutro might be problematic. Click here for more details.

Files changed (12) hide show

sutro/cli.py +1 -1
sutro/common.py +220 -0
sutro/interfaces.py +90 -0
sutro/sdk.py +333 -579
sutro/templates/classification.py +117 -0
sutro/templates/embed.py +53 -0
sutro/validation.py +60 -0
{sutro-0.1.37.dist-info → sutro-0.1.38.dist-info}/METADATA +1 -1
sutro-0.1.38.dist-info/RECORD +12 -0
sutro-0.1.37.dist-info/RECORD +0 -7
{sutro-0.1.37.dist-info → sutro-0.1.38.dist-info}/WHEEL +0 -0
{sutro-0.1.37.dist-info → sutro-0.1.38.dist-info}/entry_points.txt +0 -0

sutro/cli.py CHANGED Viewed

@@ -52,7 +52,7 @@ def set_human_readable_dates(datetime_columns, df):
             # Convert UTC string to local time string
             df = df.with_columns(
                 pl.col(col)
-                .str.to_datetime()
+                .str.to_datetime("%Y-%m-%dT%H:%M:%S%.f%Z")
                 .map_elements(
                     lambda dt: dt.replace(tzinfo=timezone.utc)
                     .astimezone()

sutro/common.py ADDED Viewed

@@ -0,0 +1,220 @@
+import os
+from typing import Union, List, Literal, Dict, Any, Type, Optional
+import pandas as pd
+import polars as pl
+from colorama import Fore, Style
+from pydantic import BaseModel
+from tqdm import tqdm
+EmbeddingModelOptions = Literal[
+    "qwen-3-embedding-0.6b",
+    "qwen-3-embedding-6b",
+    "qwen-3-embedding-8b",
+]
+# Models available for inference.  Keep in sync with the backend configuration
+# so users get helpful autocompletion when selecting a model.
+ModelOptions = Literal[
+    "llama-3.2-3b",
+    "llama-3.1-8b",
+    "llama-3.3-70b",
+    "llama-3.3-70b",
+    "qwen-3-4b",
+    "qwen-3-14b",
+    "qwen-3-32b",
+    "qwen-3-30b-a3b",
+    "qwen-3-235b-a22b",
+    "qwen-3-4b-thinking",
+    "qwen-3-14b-thinking",
+    "qwen-3-32b-thinking",
+    "qwen-3-235b-a22b-thinking",
+    "qwen-3-30b-a3b-thinking",
+    "gemma-3-4b-it",
+    "gemma-3-12b-it",
+    "gemma-3-27b-it",
+    "gpt-oss-20b",
+    "gpt-oss-120b",
+    "qwen-3-embedding-0.6b",
+    "qwen-3-embedding-6b",
+    "qwen-3-embedding-8b",
+]
+def do_dataframe_column_concatenation(
+    data: Union[pd.DataFrame, pl.DataFrame], column: Union[str, List[str]]
+):
+    """
+    If the user has supplied a dataframe and a list of columns, this will intelligenly concatenate the columns into a single column, accepting separator strings.
+    """
+    try:
+        if isinstance(data, pd.DataFrame):
+            series_parts = []
+            for p in column:
+                if p in data.columns:
+                    s = data[p].astype("string").fillna("")
+                else:
+                    # Treat as a literal separator
+                    s = pd.Series([p] * len(data), index=data.index, dtype="string")
+                series_parts.append(s)
+            out = series_parts[0]
+            for s in series_parts[1:]:
+                out = out.str.cat(s, na_rep="")
+            return out.tolist()
+        elif isinstance(data, pl.DataFrame):
+            exprs = []
+            for p in column:
+                if p in data.columns:
+                    exprs.append(pl.col(p).cast(pl.Utf8).fill_null(""))
+                else:
+                    exprs.append(pl.lit(p))
+            result = data.select(
+                pl.concat_str(exprs, separator="", ignore_nulls=False).alias("concat")
+            )
+            return result["concat"].to_list()
+        return None
+    except Exception as e:
+        raise ValueError(f"Error handling column concatentation: {e}")
+def handle_data_helper(
+    data: Union[List, pd.DataFrame, pl.DataFrame, str], column: str = None
+):
+    if isinstance(data, list):
+        input_data = data
+    elif isinstance(data, (pd.DataFrame, pl.DataFrame)):
+        if column is None:
+            raise ValueError("Column name must be specified for DataFrame input")
+        if isinstance(column, list):
+            input_data = do_dataframe_column_concatenation(data, column)
+        elif isinstance(column, str):
+            input_data = data[column].to_list()
+    elif isinstance(data, str):
+        if data.startswith("dataset-"):
+            input_data = data + ":" + column
+        else:
+            file_ext = os.path.splitext(data)[1].lower()
+            if file_ext == ".csv":
+                df = pl.read_csv(data)
+            elif file_ext == ".parquet":
+                df = pl.read_parquet(data)
+            elif file_ext in [".txt", ""]:
+                with open(data, "r") as file:
+                    input_data = [line.strip() for line in file]
+            else:
+                raise ValueError(f"Unsupported file type: {file_ext}")
+            if file_ext in [".csv", ".parquet"]:
+                if column is None:
+                    raise ValueError(
+                        "Column name must be specified for CSV/Parquet input"
+                    )
+                input_data = df[column].to_list()
+    else:
+        raise ValueError(
+            "Unsupported data type. Please provide a list, DataFrame, or file path."
+        )
+    return input_data
+def normalize_output_schema(
+    output_schema: Union[Dict[str, Any], Type[BaseModel], None],
+):
+    """Consolidate any varied types for output_schema to dict format."""
+    if hasattr(output_schema, "model_json_schema"):
+        return output_schema.model_json_schema()
+    elif isinstance(output_schema, dict):
+        return output_schema
+    else:
+        raise ValueError(
+            "Invalid output schema type. Must be a dictionary or a pydantic Model."
+        )
+def to_colored_text(
+    text: str, state: Optional[Literal["success", "fail", "callout"]] = None
+) -> str:
+    """
+    Apply color to text based on state.
+    Args:
+        text (str): The text to color
+        state (Optional[Literal['success', 'fail']]): The state that determines the color.
+            Options: 'success', 'fail', or None (default blue)
+    Returns:
+        str: Text with appropriate color applied
+    """
+    match state:
+        case "success":
+            return f"{Fore.GREEN}{text}{Style.RESET_ALL}"
+        case "fail":
+            return f"{Fore.RED}{text}{Style.RESET_ALL}"
+        case "callout":
+            return f"{Fore.MAGENTA}{text}{Style.RESET_ALL}"
+        case _:
+            # Default to blue for normal/processing states
+            return f"{Fore.BLUE}{text}{Style.RESET_ALL}"
+def fancy_tqdm(
+    total: int,
+    desc: str = "Progress",
+    color: str = "blue",
+    style=1,
+    postfix: str = None,
+):
+    """
+    Creates a customized tqdm progress bar with different styling options.
+    Args:
+        total (int): Total iterations
+        desc (str): Description for the progress bar
+        color (str): Color of the progress bar (green, blue, red, yellow, magenta)
+        style (int): Style preset (1-4)
+        postfix (str): Postfix for the progress bar
+    """
+    # Style presets
+    style_presets = {
+        1: {
+            "bar_format": "{l_bar}{bar:30}| {n_fmt}/{total_fmt} | {percentage:3.0f}% {postfix}",
+            "ascii": "░▒█",
+        },
+        2: {
+            "bar_format": "╢{l_bar}{bar:30}╟ {percentage:3.0f}%",
+            "ascii": "▁▂▃▄▅▆▇█",
+        },
+        3: {
+            "bar_format": "{desc}: |{bar}| {percentage:3.0f}% [{elapsed}<{remaining}]",
+            "ascii": "◯◔◑◕●",
+        },
+        4: {
+            "bar_format": "⏳ {desc} {percentage:3.0f}% |{bar}| {n_fmt}/{total_fmt}",
+            "ascii": "⬜⬛",
+        },
+        5: {
+            "bar_format": "⏳ {desc} {percentage:3.0f}% |{bar}| {n_fmt}/{total_fmt}",
+            "ascii": "▏▎▍▌▋▊▉█",
+        },
+    }
+    # Get style configuration
+    style_config = style_presets.get(style, style_presets[1])
+    return tqdm(
+        total=total,
+        desc=desc,
+        colour=color,
+        bar_format=style_config["bar_format"],
+        ascii=style_config["ascii"],
+        ncols=80,
+        dynamic_ncols=True,
+        smoothing=0.3,
+        leave=True,
+        postfix=postfix,
+    )

sutro/interfaces.py ADDED Viewed

@@ -0,0 +1,90 @@
+from enum import Enum
+import pandas as pd
+import polars as pl
+from typing import Any, Dict, List, Optional, Union, Type
+from pydantic import BaseModel
+from sutro.common import ModelOptions
+class BaseSutroClient:
+    """
+    Base class declaring attributes and method interfaces for template function mixins
+    to use.
+    """
+    # Core inference method interface
+    def infer(
+        self,
+        data: Union[List, pd.DataFrame, pl.DataFrame, str],
+        model: Union[ModelOptions, List[ModelOptions]] = "gemma-3-12b-it",
+        name: Union[str, List[str]] = None,
+        description: Union[str, List[str]] = None,
+        column: Union[str, List[str]] = None,
+        output_column: str = "inference_result",
+        job_priority: int = 0,
+        output_schema: Union[Dict[str, Any], Type[BaseModel]] = None,
+        sampling_params: dict = None,
+        system_prompt: str = None,
+        dry_run: bool = False,
+        stay_attached: Optional[bool] = None,
+        random_seed_per_input: bool = False,
+        truncate_rows: bool = True,
+    ) -> Any:
+        """
+        Run inference on a dataset.
+        Args:
+            data: Input data (list, DataFrame, or dataset ID)
+            model: Model(s) to use for inference
+            name: Job name(s)
+            description: Job description(s)
+            column: Column(s) to process
+            output_column: Name for output column
+            job_priority: Job priority (0-10, higher = more priority)
+            output_schema: Pydantic model or JSON schema for structured output
+            sampling_params: Model sampling parameters
+            system_prompt: System prompt for the model
+            dry_run: If True, validate without running
+            stay_attached: Wait for job completion
+            random_seed_per_input: Use random seed per input
+            truncate_rows: Truncate long inputs
+        Returns:
+            Job result or job ID
+        """
+        ...
+    def await_job_completion(
+        self,
+        job_id: str,
+        timeout: Optional[int] = 7200,
+        obtain_results: bool = True,
+        is_cost_estimate: bool = False,
+    ) -> pl.DataFrame | None: ...
+class JobStatus(str, Enum):
+    """Job statuses that will be returned by the API & SDK"""
+    UNKNOWN = "UNKNOWN"
+    QUEUED = "QUEUED"  # Job is waiting to start
+    STARTING = "STARTING"  # Job is in the process of starting up
+    RUNNING = "RUNNING"  # Job is actively running
+    SUCCEEDED = "SUCCEEDED"  # Job completed successfully
+    CANCELLING = "CANCELLING"  # Job is in the process of being canceled
+    CANCELLED = "CANCELLED"  # Job was canceled by the user
+    FAILED = "FAILED"  # Job failed
+    @classmethod
+    def terminal_statuses(cls) -> list["JobStatus"]:
+        return [
+            cls.SUCCEEDED,
+            cls.FAILED,
+            cls.CANCELLING,
+            cls.CANCELLED,
+        ]
+    def is_terminal(self) -> bool:
+        return self in self.terminal_statuses()

sutro 0.1.37__py3-none-any.whl → 0.1.38__py3-none-any.whl

Potentially problematic release.

sutro 0.1.37py3-none-any.whl → 0.1.38py3-none-any.whl