sutro 0.1.37__py3-none-any.whl → 0.1.38__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sutro might be problematic. Click here for more details.

@@ -0,0 +1,117 @@
1
+ import json
2
+ from typing import Union, List
3
+ import polars as pl
4
+ import pandas as pd
5
+ from pydantic import BaseModel
6
+
7
+ from ..common import ModelOptions
8
+ from ..interfaces import BaseSutroClient
9
+
10
+
11
+ class ClassificationTemplates(BaseSutroClient):
12
+ def classify(
13
+ self,
14
+ data: Union[List, pd.DataFrame, pl.DataFrame, str],
15
+ classes: Union[dict[str, str], list[str]],
16
+ model: ModelOptions = "gemma-3-12b-it",
17
+ job_priority: int = 0,
18
+ name: Union[str, List[str]] = None,
19
+ description: Union[str, List[str]] = None,
20
+ output_column: str = "inference_result",
21
+ column: Union[str, List[str]] = None,
22
+ truncate_rows: bool = True,
23
+ include_scratchpad: bool = False,
24
+ ):
25
+ """
26
+ A simple template style function to perform classification on the provided data with Sutro. The intention is that the implemented code should be very easy to extend further, while showing a basic structure for large-scale classification with Sutro.
27
+
28
+ It uses structured outputs with a scratchpad field, enabling the model to reason step-by-step before providing the final classification.
29
+ The method supports various input formats including lists, DataFrames (Polars or Pandas), file paths, and datasets.
30
+ The method will wait for the classification job to complete before returning the results.
31
+
32
+ Args:
33
+ data (Union[List, pd.DataFrame, pl.DataFrame, str]): The data to classify. Each row should contain some text to classifiy that fits into one of the passed in labels.
34
+ classes (Union[dict[str, str], list[str]]): The classification classes. Can be either:
35
+ - A list of class names, ie ["Positive", "Negative", "Neutral"]
36
+ - A dict mapping class labels to descriptions, ie {"Positive": "Expresses satisfaction...", ...}
37
+ Providing descriptions can improve classification accuracy, especially for ambiguous or domain-specific categories.
38
+ model (ModelOptions, optional): The LLM to use. Defaults to "gemma-3-12b-it"; a model chosen for its balance of performance and efficiency, that also retains competency across a broad number of different domains.
39
+ job_priority (int, optional): The priority of the job. Defaults to 0.
40
+ name (Union[str, List[str]], optional): A job name for experiment/metadata tracking purposes. Defaults to None.
41
+ description (Union[str, List[str]], optional): A job description for experiment/metadata tracking purposes. Defaults to None.
42
+ output_column (str, optional): The column name to store the classification results in if the input is a DataFrame. Defaults to "inference_result".
43
+ column (Union[str, List[str]], optional): The column name to use for classification. Required if data is a DataFrame, file path, or dataset. If a list is supplied, it will concatenate the columns of the list into a single column, accepting separator strings.
44
+ truncate_rows (bool, optional): If True, any rows that have a token count exceeding the context window length of the selected model will be truncated to the max length that will fit within the context window. Defaults to True.
45
+ include_scratchpad (bool, optional): If True, includes the model's thinking scratchpad in the output. If False, only returns the final classification. Defaults to False.
46
+
47
+ Returns:
48
+ The completed classification results for the provided data. If include_scratchpad is True, returns both scratchpad and classification fields in JSON object. If False, returns only the classification as a string.
49
+
50
+ """
51
+ if isinstance(classes, dict):
52
+ formatted_classes = "\n".join(
53
+ [f"- {name}: {desc}" for name, desc in classes.items()]
54
+ )
55
+ else:
56
+ formatted_classes = "\n".join([f"- {c}" for c in classes])
57
+
58
+ system_prompt = f"""You are an expert classifier. Your task is to accurately categorize the input into one of the provided classes.
59
+
60
+ ## Classes
61
+
62
+ {formatted_classes}
63
+
64
+ ## Instructions
65
+
66
+ 1. **Analyze the input carefully**: Read and understand the full context - identify key elements, themes, and characteristics
67
+
68
+ 2. **Consider each class**: For each possible class, evaluate how similar the input is to its typical characteristics
69
+
70
+ 3. **Provide your reasoning in the scratchpad**: Think through which class fits best and why
71
+
72
+ 4. **Provide output**: Give your final classification
73
+
74
+ If needed, use the scratchpad field to work through steps 1-3, then provide your final answer in the classification field.
75
+
76
+ ## Guidelines
77
+
78
+ - Select exactly ONE class, even if multiple seem applicable (choose the best match)
79
+ - If the input is ambiguous, choose the closest fit and explain your reasoning
80
+ - Base your decision on the actual content, not assumptions or implications
81
+ - Similar inputs should receive the same classification
82
+
83
+ Respond using the structured format with scratchpad and classification fields."""
84
+
85
+ class ClassificationOutput(BaseModel):
86
+ # Since we're using structured outputs, we want to give the model some
87
+ # space to reason and think as needed
88
+ scratchpad: str
89
+ classification: str
90
+
91
+ job_id = self.infer(
92
+ data,
93
+ model,
94
+ name,
95
+ description,
96
+ system_prompt=system_prompt,
97
+ output_schema=ClassificationOutput,
98
+ column=column,
99
+ output_column=output_column,
100
+ job_priority=job_priority,
101
+ truncate_rows=truncate_rows,
102
+ stay_attached=False,
103
+ )
104
+
105
+ results = self.await_job_completion(job_id)
106
+
107
+ # Filter out scratchpad if not wanted
108
+ if not include_scratchpad:
109
+ results = results.with_columns(
110
+ pl.col(output_column)
111
+ .map_elements(
112
+ lambda x: json.loads(x)["classification"], return_dtype=pl.Utf8
113
+ )
114
+ .alias(output_column)
115
+ )
116
+
117
+ return results
@@ -0,0 +1,53 @@
1
+ from typing import Union, List
2
+ import polars as pl
3
+ import pandas as pd
4
+ from ..common import EmbeddingModelOptions
5
+ from ..interfaces import BaseSutroClient
6
+
7
+
8
+ class EmbeddingTemplates(BaseSutroClient):
9
+ def embed(
10
+ self,
11
+ data: Union[List, pd.DataFrame, pl.DataFrame, str],
12
+ model: EmbeddingModelOptions = "qwen-3-embedding-0.6b",
13
+ job_priority: int = 0,
14
+ name: Union[str, List[str]] = None,
15
+ description: Union[str, List[str]] = None,
16
+ output_column: str = "inference_result",
17
+ column: Union[str, List[str]] = None,
18
+ truncate_rows: bool = True,
19
+ ):
20
+ """
21
+ A simple template style function to generate embeddings for the provided data, with Sutro. The intention is that the implemented code should be very easy to extend further, while showing a basic structure for large scale embedding generation with Sutro.
22
+
23
+ This method allows you to generate vector embeddings for the provided data using Sutro.
24
+ It supports various options for inputting data, such as lists, DataFrames (Polars or Pandas), file paths and datasets.
25
+ The method will wait for the embedding job to complete before returning the results.
26
+
27
+ Args:
28
+ data (Union[List, pd.DataFrame, pl.DataFrame, str]): The data to generate embeddings for.
29
+ model (ModelOptions, optional): The embedding model to use. Defaults to "qwen-3-embedding-0.6b"; a model we chose as its small & fast, yet performs well on a variety of tasks.
30
+ job_priority (int, optional): The priority of the job. Defaults to 0.
31
+ name (Union[str, List[str]], optional): A job name for experiment/metadata tracking purposes. Defaults to None.
32
+ description (Union[str, List[str]], optional): A job description for experiment/metadata tracking purposes. Defaults to None.
33
+ output_column (str, optional): The column name to store the embedding results in if the input is a DataFrame. Defaults to "inference_result".
34
+ column (Union[str, List[str]], optional): The column name to use for embedding generation. Required if data is a DataFrame, file path, or dataset. If a list is supplied, it will concatenate the columns of the list into a single column, accepting separator strings.
35
+ truncate_rows (bool, optional): If True, any rows that have a token count exceeding the context window length of the selected model will be truncated to the max length that will fit within the context window. Defaults to True.
36
+
37
+ Returns:
38
+ The completed embedding results for the provided data.
39
+
40
+ """
41
+ job_id = self.infer(
42
+ data,
43
+ model,
44
+ name,
45
+ description,
46
+ column,
47
+ output_column,
48
+ job_priority,
49
+ truncate_rows=truncate_rows,
50
+ stay_attached=False,
51
+ )
52
+
53
+ return self.await_job_completion(job_id)
sutro/validation.py ADDED
@@ -0,0 +1,60 @@
1
+ import importlib.metadata
2
+ import json
3
+ import os
4
+
5
+ import requests
6
+
7
+ from sutro.common import to_colored_text
8
+
9
+
10
+ def check_version(package_name: str):
11
+ try:
12
+ # Local version
13
+ local_version = importlib.metadata.version(package_name)
14
+ except importlib.metadata.PackageNotFoundError:
15
+ print(f"{package_name} is not installed.")
16
+ return
17
+
18
+ try:
19
+ # Latest release from PyPI
20
+ resp = requests.get(f"https://pypi.org/pypi/{package_name}/json", timeout=2)
21
+ resp.raise_for_status()
22
+ latest_version = resp.json()["info"]["version"]
23
+
24
+ if local_version != latest_version:
25
+ msg = (
26
+ f"⚠️ You are using {package_name} {local_version}, "
27
+ f"but the latest release is {latest_version}. "
28
+ f"Run `[uv] pip install -U {package_name}` to upgrade."
29
+ )
30
+ print(to_colored_text(msg, state="callout"))
31
+ except Exception:
32
+ # Fail silently or log, you don’t want this blocking usage
33
+ pass
34
+
35
+
36
+ def check_for_api_key():
37
+ """
38
+ Check for an API key in the user's home directory.
39
+
40
+ This method looks for a configuration file named 'config.json' in the
41
+ '.sutro' directory within the user's home directory.
42
+ If the file exists, it attempts to read the API key from it.
43
+
44
+ Returns:
45
+ str or None: The API key if found in the configuration file, or None if not found.
46
+
47
+ Note:
48
+ The expected structure of the config.json file is:
49
+ {
50
+ "api_key": "your_api_key_here"
51
+ }
52
+ """
53
+ CONFIG_DIR = os.path.expanduser("~/.sutro")
54
+ CONFIG_FILE = os.path.join(CONFIG_DIR, "config.json")
55
+ if os.path.exists(CONFIG_FILE):
56
+ with open(CONFIG_FILE, "r") as f:
57
+ config = json.load(f)
58
+ return config.get("api_key")
59
+ else:
60
+ return None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sutro
3
- Version: 0.1.37
3
+ Version: 0.1.38
4
4
  Summary: Sutro Python SDK
5
5
  License-Expression: Apache-2.0
6
6
  Requires-Dist: numpy>=2.1.1,<3.0.0
@@ -0,0 +1,12 @@
1
+ sutro/__init__.py,sha256=yUiVwcZ8QamSqDdRHgzoANyTZ-x3cPzlt2Fs5OllR_w,402
2
+ sutro/cli.py,sha256=0NuqRInXA-_7TRw-T0OxP8otmUHUarMtY7kuLbWDous,13751
3
+ sutro/common.py,sha256=FuTYTzy82Ul56r9SVH0XMOqcBBspDAFvrtHM93ZbT_8,6945
4
+ sutro/interfaces.py,sha256=j8k4iEHjHu6HDEb9XqiuJrIRbXhLZi9WsiVmxC97R8s,2972
5
+ sutro/sdk.py,sha256=qhpmQNHZDaeGffPyCopmlc6YQuA1_hLFmuHeQXlNbSM,56107
6
+ sutro/templates/classification.py,sha256=iNFiyuR8bZc9Xe-NdimklpQUveqg3p_eJOlEAHaj7Is,6080
7
+ sutro/templates/embed.py,sha256=csvLA0hw5Qaro_yZvALRRp9_SbfWABFN0iQXrf8E8_I,2941
8
+ sutro/validation.py,sha256=FlFH5e5PAPIPpCrzU7mwfZKDDvrmkHt2yYsFm0Ahfmg,1849
9
+ sutro-0.1.38.dist-info/WHEEL,sha256=X16MKk8bp2DRsAuyteHJ-9qOjzmnY0x1aj0P1ftqqWA,78
10
+ sutro-0.1.38.dist-info/entry_points.txt,sha256=s-dtPZ0AScjvR8S_ykhzXxtVcUjrRlxVxyJymI81A3E,41
11
+ sutro-0.1.38.dist-info/METADATA,sha256=VeFTzSqKUiPkx8Ey-g1DkZ-_EkrLt6BwVVQa7-XC-sw,6259
12
+ sutro-0.1.38.dist-info/RECORD,,
@@ -1,7 +0,0 @@
1
- sutro/__init__.py,sha256=yUiVwcZ8QamSqDdRHgzoANyTZ-x3cPzlt2Fs5OllR_w,402
2
- sutro/cli.py,sha256=_FU8PwP4dMzXXg5ldxCXP3kaZvQtOKdA8Kzjc34xmQ0,13727
3
- sutro/sdk.py,sha256=dysuW6jwtuMjVTdDH1zCoycWLvjzBZa_Mi6dSM_zWpY,63799
4
- sutro-0.1.37.dist-info/WHEEL,sha256=X16MKk8bp2DRsAuyteHJ-9qOjzmnY0x1aj0P1ftqqWA,78
5
- sutro-0.1.37.dist-info/entry_points.txt,sha256=s-dtPZ0AScjvR8S_ykhzXxtVcUjrRlxVxyJymI81A3E,41
6
- sutro-0.1.37.dist-info/METADATA,sha256=pOSPs0yhCpKEhHZJIPIaL-wxSXYoUVBTLQNqN7WjO3E,6259
7
- sutro-0.1.37.dist-info/RECORD,,
File without changes