PyPI - sutro - Versions diffs - 0.1.35__tar.gz → 0.1.37__tar.gz - Mend

sutro 0.1.35tar.gz → 0.1.37tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

{sutro-0.1.35 → sutro-0.1.37}/PKG-INFO +14 -15
{sutro-0.1.35 → sutro-0.1.37}/pyproject.toml +8 -10
{sutro-0.1.35 → sutro-0.1.37}/sutro/sdk.py +161 -27
sutro-0.1.35/.gitignore +0 -4
{sutro-0.1.35 → sutro-0.1.37}/LICENSE +0 -0
{sutro-0.1.35 → sutro-0.1.37}/README.md +0 -0
{sutro-0.1.35 → sutro-0.1.37}/sutro/__init__.py +0 -0
{sutro-0.1.35 → sutro-0.1.37}/sutro/cli.py +0 -0

{sutro-0.1.35 → sutro-0.1.37}/PKG-INFO RENAMED Viewed

@@ -1,24 +1,23 @@
 Metadata-Version: 2.4
 Name: sutro
-Version: 0.1.35
+Version: 0.1.37
 Summary: Sutro Python SDK
-Project-URL: Homepage, https://sutro.sh
-Project-URL: Documentation, https://docs.sutro.sh
 License-Expression: Apache-2.0
-License-File: LICENSE
+Requires-Dist: numpy>=2.1.1,<3.0.0
+Requires-Dist: requests>=2.32.3,<3.0.0
+Requires-Dist: pandas>=2.2.3,<3.0.0
+Requires-Dist: polars>=1.33.0,<=1.34.0
+Requires-Dist: click>=8.1.7,<9.0.0
+Requires-Dist: colorama>=0.4.4,<1.0.0
+Requires-Dist: yaspin>=3.2.0,<4.0.0
+Requires-Dist: tqdm>=4.67.1,<5.0.0
+Requires-Dist: pydantic>=2.11.4,<3.0.0
+Requires-Dist: pyarrow>=21.0.0,<22.0.0
+Requires-Dist: ruff==0.13.1 ; extra == 'dev'
 Requires-Python: >=3.10
-Requires-Dist: click<9.0.0,>=8.1.7
-Requires-Dist: colorama<1.0.0,>=0.4.4
-Requires-Dist: numpy<3.0.0,>=2.1.1
-Requires-Dist: pandas<3.0.0,>=2.2.3
-Requires-Dist: polars<=1.8.2
-Requires-Dist: pyarrow<22.0.0,>=21.0.0
-Requires-Dist: pydantic<3.0.0,>=2.11.4
-Requires-Dist: requests<3.0.0,>=2.32.3
-Requires-Dist: tqdm<5.0.0,>=4.67.1
-Requires-Dist: yaspin<4.0.0,>=3.2.0
+Project-URL: Documentation, https://docs.sutro.sh
+Project-URL: Homepage, https://sutro.sh
 Provides-Extra: dev
-Requires-Dist: ruff==0.13.1; extra == 'dev'
 Description-Content-Type: text/markdown
 ![Sutro Logo](./assets/sutro-logo-dark.png)

{sutro-0.1.35 → sutro-0.1.37}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [build-system]
-requires = ["hatchling"]
-build-backend = "hatchling.build"
+requires = ["uv_build>=0.7.19,<=0.9.2"]
+build-backend = "uv_build"
 [tool.hatch.env]
 requires = ["pip"]
@@ -9,7 +9,7 @@ installer = "uv"
 [project]
 name = "sutro"
-version = "0.1.35"
+version = "0.1.37"
 description = "Sutro Python SDK"
 readme = "README.md"
 requires-python = ">=3.10"
@@ -18,7 +18,7 @@ dependencies = [
     "numpy>=2.1.1,<3.0.0",
     "requests>=2.32.3,<3.0.0",
     "pandas>=2.2.3,<3.0.0",
-    "polars<=1.8.2", # upgrade to 1.34.0 when https://linear.app/skysight-cloud/issue/SO-374/sdk-fix-json-unpacking-for-polars=1330 lands
+    "polars>=1.33.0,<=1.34.0",
     "click>=8.1.7,<9.0.0",
     "colorama>=0.4.4,<1.0.0",
     "yaspin>=3.2.0,<4.0.0",
@@ -39,16 +39,14 @@ sutro = "sutro.cli:cli"
 "Homepage" = "https://sutro.sh"
 "Documentation" = "https://docs.sutro.sh"
-[tool.hatch.build.targets.wheel]
-packages = ["sutro"]
-[tool.hatch.build.targets.sdist]
-include = [
+[tool.uv.build-backend]
+module-root = "."
+source-include = [
     "sutro",
     "README.md",
     "LICENSE",
 ]
-exclude = [
+source-exclude = [
     "demo_data",
     "demo.py",
     ".gitignore",

{sutro-0.1.35 → sutro-0.1.37}/sutro/sdk.py RENAMED Viewed

@@ -14,7 +14,10 @@ import time
 from pydantic import BaseModel
 import pyarrow.parquet as pq
 import shutil
+import importlib.metadata
+JOB_NAME_CHAR_LIMIT = 45
+JOB_DESCRIPTION_CHAR_LIMIT = 512
 class JobStatus(str, Enum):
     """Job statuses that will be returned by the API & SDK"""
@@ -62,15 +65,20 @@ ModelOptions = Literal[
     "llama-3.3-70b",
     "llama-3.3-70b",
     "qwen-3-4b",
+    "qwen-3-14b",
     "qwen-3-32b",
+    "qwen-3-30b-a3b",
+    "qwen-3-235b-a22b",
     "qwen-3-4b-thinking",
+    "qwen-3-14b-thinking",
     "qwen-3-32b-thinking",
+    "qwen-3-235b-a22b-thinking",
+    "qwen-3-30b-a3b-thinking",
     "gemma-3-4b-it",
+    "gemma-3-12b-it",
     "gemma-3-27b-it",
-    "gpt-oss-120b",
     "gpt-oss-20b",
-    "qwen-3-235b-a22b-thinking",
-    "qwen-3-30b-a3b-thinking",
+    "gpt-oss-120b",
     "qwen-3-embedding-0.6b",
     "qwen-3-embedding-6b",
     "qwen-3-embedding-8b",
@@ -78,7 +86,7 @@ ModelOptions = Literal[
 def to_colored_text(
-    text: str, state: Optional[Literal["success", "fail"]] = None
+    text: str, state: Optional[Literal["success", "fail", "callout"]] = None
 ) -> str:
     """
     Apply color to text based on state.
@@ -96,6 +104,8 @@ def to_colored_text(
             return f"{Fore.GREEN}{text}{Style.RESET_ALL}"
         case "fail":
             return f"{Fore.RED}{text}{Style.RESET_ALL}"
+        case "callout":
+            return f"{Fore.MAGENTA}{text}{Style.RESET_ALL}"
         case _:
             # Default to blue for normal/processing states
             return f"{Fore.BLUE}{text}{Style.RESET_ALL}"
@@ -117,6 +127,34 @@ class Sutro:
     def __init__(self, api_key: str = None, base_url: str = "https://api.sutro.sh/"):
         self.api_key = api_key or self.check_for_api_key()
         self.base_url = base_url
+        self.check_version("sutro")
+    def check_version(self, package_name: str):
+        try:
+            # Local version
+            local_version = importlib.metadata.version(package_name)
+        except importlib.metadata.PackageNotFoundError:
+            print(f"{package_name} is not installed.")
+            return
+        try:
+            # Latest release from PyPI
+            resp = requests.get(f"https://pypi.org/pypi/{package_name}/json", timeout=2)
+            resp.raise_for_status()
+            latest_version = resp.json()["info"]["version"]
+            if local_version != latest_version:
+                msg = (f"⚠️  You are using {package_name} {local_version}, "
+                    f"but the latest release is {latest_version}. "
+                    f"Run `[uv] pip install -U {package_name}` to upgrade.")
+                print(to_colored_text(
+                        msg,
+                        state="callout"
+                    )
+                )
+        except Exception as e:
+            # Fail silently or log, you don’t want this blocking usage
+            pass
     def check_for_api_key(self):
         """
@@ -159,6 +197,39 @@ class Sutro:
         """
         self.api_key = api_key
+    def do_dataframe_column_concatenation(self, data: Union[pd.DataFrame, pl.DataFrame], column: Union[str, List[str]]):
+        """
+        If the user has supplied a dataframe and a list of columns, this will intelligenly concatenate the columns into a single column, accepting separator strings.
+        """
+        try:
+            if isinstance(data, pd.DataFrame):
+                series_parts = []
+                for p in column:
+                    if p in data.columns:
+                        s = data[p].astype("string").fillna("")
+                    else:
+                        # Treat as a literal separator
+                        s = pd.Series([p] * len(data), index=data.index, dtype="string")
+                    series_parts.append(s)
+                out = series_parts[0]
+                for s in series_parts[1:]:
+                    out = out.str.cat(s, na_rep="")
+                return out.tolist()
+            elif isinstance(data, pl.DataFrame):
+                exprs = []
+                for p in column:
+                    if p in data.columns:
+                        exprs.append(pl.col(p).cast(pl.Utf8).fill_null(""))
+                    else:
+                        exprs.append(pl.lit(p))
+                result = data.select(pl.concat_str(exprs, separator="", ignore_nulls=False).alias("concat"))
+                return result["concat"].to_list()
+        except Exception as e:
+            raise ValueError(f"Error handling column concatentation: {e}")
     def handle_data_helper(
         self, data: Union[List, pd.DataFrame, pl.DataFrame, str], column: str = None
     ):
@@ -167,7 +238,10 @@ class Sutro:
         elif isinstance(data, (pd.DataFrame, pl.DataFrame)):
             if column is None:
                 raise ValueError("Column name must be specified for DataFrame input")
-            input_data = data[column].to_list()
+            if isinstance(column, list):
+                input_data = self.do_dataframe_column_concatenation(data, column)
+            elif isinstance(column, str):
+                input_data = data[column].to_list()
         elif isinstance(data, str):
             if data.startswith("dataset-"):
                 input_data = data + ":" + column
@@ -212,7 +286,7 @@ class Sutro:
         self,
         data: Union[List, pd.DataFrame, pl.DataFrame, str],
         model: ModelOptions,
-        column: str,
+        column: Union[str, List[str]],
         output_column: str,
         job_priority: int,
         json_schema: Dict[str, Any],
@@ -222,7 +296,15 @@ class Sutro:
         stay_attached: Optional[bool],
         random_seed_per_input: bool,
         truncate_rows: bool,
+        name: str,
+        description: str,
     ):
+        # Validate name and description lengths
+        if name is not None and len(name) > JOB_NAME_CHAR_LIMIT:
+            raise ValueError(f"Job name cannot exceed {JOB_NAME_CHAR_LIMIT} characters.")
+        if description is not None and len(description) > JOB_DESCRIPTION_CHAR_LIMIT:
+            raise ValueError(f"Job description cannot exceed {JOB_DESCRIPTION_CHAR_LIMIT} characters.")
         input_data = self.handle_data_helper(data, column)
         endpoint = f"{self.base_url}/batch-inference"
         headers = {
@@ -239,6 +321,8 @@ class Sutro:
             "sampling_params": sampling_params,
             "random_seed_per_input": random_seed_per_input,
             "truncate_rows": truncate_rows,
+            "name": name,
+            "description": description,
         }
         # There are two gotchas with yaspin:
@@ -284,9 +368,10 @@ class Sutro:
                         )
                         return job_id
                     else:
+                        name_text = f" and name {name}" if name is not None else ""
                         spinner.write(
                             to_colored_text(
-                                f"🛠 Priority {job_priority} Job created with ID: {job_id}.",
+                                f"🛠 Priority {job_priority} Job created with ID: {job_id}{name_text}.",
                                 state="success",
                             )
                         )
@@ -435,7 +520,21 @@ class Sutro:
                 results = job_results_response.json()["results"]["outputs"]
-                spinner.write(
+                if isinstance(data, (pd.DataFrame, pl.DataFrame)):
+                    if isinstance(data, pd.DataFrame):
+                        data[output_column] = results
+                    elif isinstance(data, pl.DataFrame):
+                        data = data.with_columns(pl.Series(output_column, results))
+                    print(data)
+                    spinner.write(
+                        to_colored_text(
+                            f"✔ Displaying result preview. You can join the results on the original dataframe with `so.get_job_results('{job_id}', with_original_df=<original_df>)`",
+                            state="success",
+                        )
+                    )
+                else:
+                    print(results)
+                    spinner.write(
                     to_colored_text(
                         f"✔ Job results received. You can re-obtain the results with `so.get_job_results('{job_id}')`",
                         state="success",
@@ -443,14 +542,7 @@ class Sutro:
                 )
                 spinner.stop()
-                if isinstance(data, (pd.DataFrame, pl.DataFrame)):
-                    if isinstance(data, pd.DataFrame):
-                        data[output_column] = results
-                    elif isinstance(data, pl.DataFrame):
-                        data = data.with_columns(pl.Series(output_column, results))
-                    return data
-                return results
+                return job_id
             return None
         return None
@@ -458,7 +550,9 @@ class Sutro:
         self,
         data: Union[List, pd.DataFrame, pl.DataFrame, str],
         model: Union[ModelOptions, List[ModelOptions]] = "gemma-3-12b-it",
-        column: str = None,
+        name: Union[str, List[str]] = None,
+        description: Union[str, List[str]] = None,
+        column: Union[str, List[str]] = None,
         output_column: str = "inference_result",
         job_priority: int = 0,
         output_schema: Union[Dict[str, Any], BaseModel] = None,
@@ -467,7 +561,7 @@ class Sutro:
         dry_run: bool = False,
         stay_attached: Optional[bool] = None,
         random_seed_per_input: bool = False,
-        truncate_rows: bool = False,
+        truncate_rows: bool = True,
     ):
         """
         Run inference on the provided data.
@@ -478,7 +572,9 @@ class Sutro:
         Args:
             data (Union[List, pd.DataFrame, pl.DataFrame, str]): The data to run inference on.
             model (Union[ModelOptions, List[ModelOptions]], optional): The model(s) to use for inference. Defaults to "llama-3.1-8b". You can pass a single model or a list of models. In the case of a list, the inference will be run in parallel for each model and stay_attached will be set to False.
-            column (str, optional): The column name to use for inference. Required if data is a DataFrame, file path, or dataset.
+            name (Union[str, List[str]], optional): A job name for experiment/metadata tracking purposes. If using a list of models, you must pass a list of names with length equal to the number of models, or None. Defaults to None.
+            description (Union[str, List[str]], optional): A job description for experiment/metadata tracking purposes. If using a list of models, you must pass a list of descriptions with length equal to the number of models, or None. Defaults to None.
+            column (Union[str, List[str]], optional): The column name to use for inference. Required if data is a DataFrame, file path, or dataset. If a list is supplied, it will concatenate the columns of the list into a single column, accepting separator strings.
             output_column (str, optional): The column name to store the inference results in if the input is a DataFrame. Defaults to "inference_result".
             job_priority (int, optional): The priority of the job. Defaults to 0.
             output_schema (Union[Dict[str, Any], BaseModel], optional): A structured schema for the output.
@@ -488,10 +584,10 @@ class Sutro:
             dry_run (bool, optional): If True, the method will return cost estimates instead of running inference. Defaults to False.
             stay_attached (bool, optional): If True, the method will stay attached to the job until it is complete. Defaults to True for prototyping jobs, False otherwise.
             random_seed_per_input (bool, optional): If True, the method will use a different random seed for each input. Defaults to False.
-            truncate_rows (bool, optional): If True, any rows that have a token count exceeding the context window length of the selected model will be truncated to the max length that will fit within the context window. Defaults to False.
+            truncate_rows (bool, optional): If True, any rows that have a token count exceeding the context window length of the selected model will be truncated to the max length that will fit within the context window. Defaults to True.
         Returns:
-            Union[List, pd.DataFrame, pl.DataFrame, str]: The results of the inference.
+            str: The ID of the inference job.
         """
         if isinstance(model, list) == False:
@@ -503,6 +599,34 @@ class Sutro:
             model_list = model
             stay_attached = False
+        if isinstance(model_list, list):
+            if isinstance(name, list):
+                if len(name) != len(model_list):
+                    raise ValueError("Name list must be the same length as the model list.")
+                name_list = name
+            elif isinstance(name, str):
+                raise ValueError("Name must be a list if using a list of models.")
+            elif name is None:
+                name_list = [None] * len(model_list)
+        else:
+            if isinstance(name, list):
+                raise ValueError("Name must be a string or None if using a single model.")
+            name_list = [name]
+        if isinstance(model_list, list):
+            if isinstance(description, list):
+                if len(description) != len(model_list):
+                    raise ValueError("Descriptions list must be the same length as the model list.")
+                description_list = description
+            elif isinstance(description, str):
+                raise ValueError("Description must be a list if using a list of models.")
+            elif description is None:
+                description_list = [None] * len(model_list)
+        else:
+            if isinstance(name, list):
+                raise ValueError("Description must be a string or None if using a single model.")
+            description_list = [description]
         # Convert BaseModel to dict if needed
         if output_schema is not None:
             if hasattr(
@@ -517,12 +641,12 @@ class Sutro:
                 )
         else:
             json_schema = None
         results = []
-        for model in model_list:
+        for i in range(len(model_list)):
             res = self._run_one_batch_inference(
                 data,
-                model,
+                model_list[i],
                 column,
                 output_column,
                 job_priority,
@@ -533,6 +657,8 @@ class Sutro:
                 stay_attached,
                 random_seed_per_input,
                 truncate_rows,
+                name_list[i],
+                description_list[i],
             )
             results.append(res)
@@ -967,9 +1093,9 @@ class Sutro:
                 first_row = json.loads(
                     results_df.head(1)[output_column][0]
                 )  # checks if the first row can be json decoded
+                results_df = results_df.map_columns(output_column, lambda s: s.str.json_decode())
                 results_df = results_df.with_columns(
                     pl.col(output_column)
-                    .str.json_decode()
                     .alias("output_column_json_decoded")
                 )
                 json_decoded_fields = first_row.keys()
@@ -979,7 +1105,15 @@ class Sutro:
                         .struct.field(field)
                         .alias(field)
                     )
-                # drop the output_column and the json decoded column
+                if sorted(list(set(json_decoded_fields))) == ['content', 'reasoning_content']: # if it's a reasoning model, we need to unpack the content field
+                    content_keys = results_df.head(1)['content'][0].keys()
+                    for key in content_keys:
+                        results_df = results_df.with_columns(
+                            pl.col("content")
+                            .struct.field(key)
+                            .alias(key)
+                        )
+                    results_df = results_df.drop("content")
                 results_df = results_df.drop(
                     [output_column, "output_column_json_decoded"]
                 )
@@ -1364,7 +1498,7 @@ class Sutro:
             timeout (Optional[int]): The max time in seconds the function should wait for job results for. Default is 7200 (2 hours).
         Returns:
-            list: The results of the job.
+            pl.DataFrame: The results of the job in a polars DataFrame.
         """
         POLL_INTERVAL = 5

sutro-0.1.35/.gitignore DELETED Viewed

@@ -1,4 +0,0 @@
-.DS_Store
-dist
-**/__pycache__/
-demo_data

{sutro-0.1.35 → sutro-0.1.37}/LICENSE RENAMED Viewed

File without changes

{sutro-0.1.35 → sutro-0.1.37}/README.md RENAMED Viewed

File without changes

{sutro-0.1.35 → sutro-0.1.37}/sutro/__init__.py RENAMED Viewed

File without changes

{sutro-0.1.35 → sutro-0.1.37}/sutro/cli.py RENAMED Viewed

File without changes

sutro 0.1.35__tar.gz → 0.1.37__tar.gz

sutro 0.1.35tar.gz → 0.1.37tar.gz