sutro 0.1.36__tar.gz → 0.1.37__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,24 +1,23 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sutro
3
- Version: 0.1.36
3
+ Version: 0.1.37
4
4
  Summary: Sutro Python SDK
5
- Project-URL: Homepage, https://sutro.sh
6
- Project-URL: Documentation, https://docs.sutro.sh
7
5
  License-Expression: Apache-2.0
8
- License-File: LICENSE
6
+ Requires-Dist: numpy>=2.1.1,<3.0.0
7
+ Requires-Dist: requests>=2.32.3,<3.0.0
8
+ Requires-Dist: pandas>=2.2.3,<3.0.0
9
+ Requires-Dist: polars>=1.33.0,<=1.34.0
10
+ Requires-Dist: click>=8.1.7,<9.0.0
11
+ Requires-Dist: colorama>=0.4.4,<1.0.0
12
+ Requires-Dist: yaspin>=3.2.0,<4.0.0
13
+ Requires-Dist: tqdm>=4.67.1,<5.0.0
14
+ Requires-Dist: pydantic>=2.11.4,<3.0.0
15
+ Requires-Dist: pyarrow>=21.0.0,<22.0.0
16
+ Requires-Dist: ruff==0.13.1 ; extra == 'dev'
9
17
  Requires-Python: >=3.10
10
- Requires-Dist: click<9.0.0,>=8.1.7
11
- Requires-Dist: colorama<1.0.0,>=0.4.4
12
- Requires-Dist: numpy<3.0.0,>=2.1.1
13
- Requires-Dist: pandas<3.0.0,>=2.2.3
14
- Requires-Dist: polars<=1.8.2
15
- Requires-Dist: pyarrow<22.0.0,>=21.0.0
16
- Requires-Dist: pydantic<3.0.0,>=2.11.4
17
- Requires-Dist: requests<3.0.0,>=2.32.3
18
- Requires-Dist: tqdm<5.0.0,>=4.67.1
19
- Requires-Dist: yaspin<4.0.0,>=3.2.0
18
+ Project-URL: Documentation, https://docs.sutro.sh
19
+ Project-URL: Homepage, https://sutro.sh
20
20
  Provides-Extra: dev
21
- Requires-Dist: ruff==0.13.1; extra == 'dev'
22
21
  Description-Content-Type: text/markdown
23
22
 
24
23
  ![Sutro Logo](./assets/sutro-logo-dark.png)
@@ -1,6 +1,6 @@
1
1
  [build-system]
2
- requires = ["hatchling"]
3
- build-backend = "hatchling.build"
2
+ requires = ["uv_build>=0.7.19,<=0.9.2"]
3
+ build-backend = "uv_build"
4
4
 
5
5
  [tool.hatch.env]
6
6
  requires = ["pip"]
@@ -9,7 +9,7 @@ installer = "uv"
9
9
 
10
10
  [project]
11
11
  name = "sutro"
12
- version = "0.1.36"
12
+ version = "0.1.37"
13
13
  description = "Sutro Python SDK"
14
14
  readme = "README.md"
15
15
  requires-python = ">=3.10"
@@ -18,7 +18,7 @@ dependencies = [
18
18
  "numpy>=2.1.1,<3.0.0",
19
19
  "requests>=2.32.3,<3.0.0",
20
20
  "pandas>=2.2.3,<3.0.0",
21
- "polars<=1.8.2", # upgrade to 1.34.0 when https://linear.app/skysight-cloud/issue/SO-374/sdk-fix-json-unpacking-for-polars=1330 lands
21
+ "polars>=1.33.0,<=1.34.0",
22
22
  "click>=8.1.7,<9.0.0",
23
23
  "colorama>=0.4.4,<1.0.0",
24
24
  "yaspin>=3.2.0,<4.0.0",
@@ -39,16 +39,14 @@ sutro = "sutro.cli:cli"
39
39
  "Homepage" = "https://sutro.sh"
40
40
  "Documentation" = "https://docs.sutro.sh"
41
41
 
42
- [tool.hatch.build.targets.wheel]
43
- packages = ["sutro"]
44
-
45
- [tool.hatch.build.targets.sdist]
46
- include = [
42
+ [tool.uv.build-backend]
43
+ module-root = "."
44
+ source-include = [
47
45
  "sutro",
48
46
  "README.md",
49
47
  "LICENSE",
50
48
  ]
51
- exclude = [
49
+ source-exclude = [
52
50
  "demo_data",
53
51
  "demo.py",
54
52
  ".gitignore",
@@ -14,6 +14,7 @@ import time
14
14
  from pydantic import BaseModel
15
15
  import pyarrow.parquet as pq
16
16
  import shutil
17
+ import importlib.metadata
17
18
 
18
19
  JOB_NAME_CHAR_LIMIT = 45
19
20
  JOB_DESCRIPTION_CHAR_LIMIT = 512
@@ -85,7 +86,7 @@ ModelOptions = Literal[
85
86
 
86
87
 
87
88
  def to_colored_text(
88
- text: str, state: Optional[Literal["success", "fail"]] = None
89
+ text: str, state: Optional[Literal["success", "fail", "callout"]] = None
89
90
  ) -> str:
90
91
  """
91
92
  Apply color to text based on state.
@@ -103,6 +104,8 @@ def to_colored_text(
103
104
  return f"{Fore.GREEN}{text}{Style.RESET_ALL}"
104
105
  case "fail":
105
106
  return f"{Fore.RED}{text}{Style.RESET_ALL}"
107
+ case "callout":
108
+ return f"{Fore.MAGENTA}{text}{Style.RESET_ALL}"
106
109
  case _:
107
110
  # Default to blue for normal/processing states
108
111
  return f"{Fore.BLUE}{text}{Style.RESET_ALL}"
@@ -124,6 +127,34 @@ class Sutro:
124
127
  def __init__(self, api_key: str = None, base_url: str = "https://api.sutro.sh/"):
125
128
  self.api_key = api_key or self.check_for_api_key()
126
129
  self.base_url = base_url
130
+ self.check_version("sutro")
131
+
132
+ def check_version(self, package_name: str):
133
+ try:
134
+ # Local version
135
+ local_version = importlib.metadata.version(package_name)
136
+ except importlib.metadata.PackageNotFoundError:
137
+ print(f"{package_name} is not installed.")
138
+ return
139
+
140
+ try:
141
+ # Latest release from PyPI
142
+ resp = requests.get(f"https://pypi.org/pypi/{package_name}/json", timeout=2)
143
+ resp.raise_for_status()
144
+ latest_version = resp.json()["info"]["version"]
145
+
146
+ if local_version != latest_version:
147
+ msg = (f"⚠️ You are using {package_name} {local_version}, "
148
+ f"but the latest release is {latest_version}. "
149
+ f"Run `[uv] pip install -U {package_name}` to upgrade.")
150
+ print(to_colored_text(
151
+ msg,
152
+ state="callout"
153
+ )
154
+ )
155
+ except Exception as e:
156
+ # Fail silently or log, you don’t want this blocking usage
157
+ pass
127
158
 
128
159
  def check_for_api_key(self):
129
160
  """
@@ -489,7 +520,21 @@ class Sutro:
489
520
 
490
521
  results = job_results_response.json()["results"]["outputs"]
491
522
 
492
- spinner.write(
523
+ if isinstance(data, (pd.DataFrame, pl.DataFrame)):
524
+ if isinstance(data, pd.DataFrame):
525
+ data[output_column] = results
526
+ elif isinstance(data, pl.DataFrame):
527
+ data = data.with_columns(pl.Series(output_column, results))
528
+ print(data)
529
+ spinner.write(
530
+ to_colored_text(
531
+ f"✔ Displaying result preview. You can join the results on the original dataframe with `so.get_job_results('{job_id}', with_original_df=<original_df>)`",
532
+ state="success",
533
+ )
534
+ )
535
+ else:
536
+ print(results)
537
+ spinner.write(
493
538
  to_colored_text(
494
539
  f"✔ Job results received. You can re-obtain the results with `so.get_job_results('{job_id}')`",
495
540
  state="success",
@@ -497,14 +542,7 @@ class Sutro:
497
542
  )
498
543
  spinner.stop()
499
544
 
500
- if isinstance(data, (pd.DataFrame, pl.DataFrame)):
501
- if isinstance(data, pd.DataFrame):
502
- data[output_column] = results
503
- elif isinstance(data, pl.DataFrame):
504
- data = data.with_columns(pl.Series(output_column, results))
505
- return data
506
-
507
- return results
545
+ return job_id
508
546
  return None
509
547
  return None
510
548
 
@@ -523,7 +561,7 @@ class Sutro:
523
561
  dry_run: bool = False,
524
562
  stay_attached: Optional[bool] = None,
525
563
  random_seed_per_input: bool = False,
526
- truncate_rows: bool = False,
564
+ truncate_rows: bool = True,
527
565
  ):
528
566
  """
529
567
  Run inference on the provided data.
@@ -546,10 +584,10 @@ class Sutro:
546
584
  dry_run (bool, optional): If True, the method will return cost estimates instead of running inference. Defaults to False.
547
585
  stay_attached (bool, optional): If True, the method will stay attached to the job until it is complete. Defaults to True for prototyping jobs, False otherwise.
548
586
  random_seed_per_input (bool, optional): If True, the method will use a different random seed for each input. Defaults to False.
549
- truncate_rows (bool, optional): If True, any rows that have a token count exceeding the context window length of the selected model will be truncated to the max length that will fit within the context window. Defaults to False.
587
+ truncate_rows (bool, optional): If True, any rows that have a token count exceeding the context window length of the selected model will be truncated to the max length that will fit within the context window. Defaults to True.
550
588
 
551
589
  Returns:
552
- Union[List, pd.DataFrame, pl.DataFrame, str]: The results of the inference.
590
+ str: The ID of the inference job.
553
591
 
554
592
  """
555
593
  if isinstance(model, list) == False:
@@ -568,6 +606,8 @@ class Sutro:
568
606
  name_list = name
569
607
  elif isinstance(name, str):
570
608
  raise ValueError("Name must be a list if using a list of models.")
609
+ elif name is None:
610
+ name_list = [None] * len(model_list)
571
611
  else:
572
612
  if isinstance(name, list):
573
613
  raise ValueError("Name must be a string or None if using a single model.")
@@ -580,6 +620,8 @@ class Sutro:
580
620
  description_list = description
581
621
  elif isinstance(description, str):
582
622
  raise ValueError("Description must be a list if using a list of models.")
623
+ elif description is None:
624
+ description_list = [None] * len(model_list)
583
625
  else:
584
626
  if isinstance(name, list):
585
627
  raise ValueError("Description must be a string or None if using a single model.")
@@ -1051,9 +1093,9 @@ class Sutro:
1051
1093
  first_row = json.loads(
1052
1094
  results_df.head(1)[output_column][0]
1053
1095
  ) # checks if the first row can be json decoded
1096
+ results_df = results_df.map_columns(output_column, lambda s: s.str.json_decode())
1054
1097
  results_df = results_df.with_columns(
1055
1098
  pl.col(output_column)
1056
- .str.json_decode()
1057
1099
  .alias("output_column_json_decoded")
1058
1100
  )
1059
1101
  json_decoded_fields = first_row.keys()
@@ -1063,7 +1105,15 @@ class Sutro:
1063
1105
  .struct.field(field)
1064
1106
  .alias(field)
1065
1107
  )
1066
- # drop the output_column and the json decoded column
1108
+ if sorted(list(set(json_decoded_fields))) == ['content', 'reasoning_content']: # if it's a reasoning model, we need to unpack the content field
1109
+ content_keys = results_df.head(1)['content'][0].keys()
1110
+ for key in content_keys:
1111
+ results_df = results_df.with_columns(
1112
+ pl.col("content")
1113
+ .struct.field(key)
1114
+ .alias(key)
1115
+ )
1116
+ results_df = results_df.drop("content")
1067
1117
  results_df = results_df.drop(
1068
1118
  [output_column, "output_column_json_decoded"]
1069
1119
  )
@@ -1448,7 +1498,7 @@ class Sutro:
1448
1498
  timeout (Optional[int]): The max time in seconds the function should wait for job results for. Default is 7200 (2 hours).
1449
1499
 
1450
1500
  Returns:
1451
- list: The results of the job.
1501
+ pl.DataFrame: The results of the job in a polars DataFrame.
1452
1502
  """
1453
1503
  POLL_INTERVAL = 5
1454
1504
 
sutro-0.1.36/.gitignore DELETED
@@ -1,4 +0,0 @@
1
- .DS_Store
2
- dist
3
- **/__pycache__/
4
- demo_data
File without changes
File without changes
File without changes
File without changes