sutro 0.1.35__tar.gz → 0.1.36__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sutro might be problematic. Click here for more details.

@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: sutro
3
- Version: 0.1.35
3
+ Version: 0.1.36
4
4
  Summary: Sutro Python SDK
5
5
  Project-URL: Homepage, https://sutro.sh
6
6
  Project-URL: Documentation, https://docs.sutro.sh
@@ -9,7 +9,7 @@ installer = "uv"
9
9
 
10
10
  [project]
11
11
  name = "sutro"
12
- version = "0.1.35"
12
+ version = "0.1.36"
13
13
  description = "Sutro Python SDK"
14
14
  readme = "README.md"
15
15
  requires-python = ">=3.10"
@@ -15,6 +15,8 @@ from pydantic import BaseModel
15
15
  import pyarrow.parquet as pq
16
16
  import shutil
17
17
 
18
+ JOB_NAME_CHAR_LIMIT = 45
19
+ JOB_DESCRIPTION_CHAR_LIMIT = 512
18
20
 
19
21
  class JobStatus(str, Enum):
20
22
  """Job statuses that will be returned by the API & SDK"""
@@ -62,15 +64,20 @@ ModelOptions = Literal[
62
64
  "llama-3.3-70b",
63
65
  "llama-3.3-70b",
64
66
  "qwen-3-4b",
67
+ "qwen-3-14b",
65
68
  "qwen-3-32b",
69
+ "qwen-3-30b-a3b",
70
+ "qwen-3-235b-a22b",
66
71
  "qwen-3-4b-thinking",
72
+ "qwen-3-14b-thinking",
67
73
  "qwen-3-32b-thinking",
74
+ "qwen-3-235b-a22b-thinking",
75
+ "qwen-3-30b-a3b-thinking",
68
76
  "gemma-3-4b-it",
77
+ "gemma-3-12b-it",
69
78
  "gemma-3-27b-it",
70
- "gpt-oss-120b",
71
79
  "gpt-oss-20b",
72
- "qwen-3-235b-a22b-thinking",
73
- "qwen-3-30b-a3b-thinking",
80
+ "gpt-oss-120b",
74
81
  "qwen-3-embedding-0.6b",
75
82
  "qwen-3-embedding-6b",
76
83
  "qwen-3-embedding-8b",
@@ -159,6 +166,39 @@ class Sutro:
159
166
  """
160
167
  self.api_key = api_key
161
168
 
169
+ def do_dataframe_column_concatenation(self, data: Union[pd.DataFrame, pl.DataFrame], column: Union[str, List[str]]):
170
+ """
171
+ If the user has supplied a dataframe and a list of columns, this will intelligenly concatenate the columns into a single column, accepting separator strings.
172
+ """
173
+ try:
174
+ if isinstance(data, pd.DataFrame):
175
+ series_parts = []
176
+ for p in column:
177
+ if p in data.columns:
178
+ s = data[p].astype("string").fillna("")
179
+ else:
180
+ # Treat as a literal separator
181
+ s = pd.Series([p] * len(data), index=data.index, dtype="string")
182
+ series_parts.append(s)
183
+
184
+ out = series_parts[0]
185
+ for s in series_parts[1:]:
186
+ out = out.str.cat(s, na_rep="")
187
+
188
+ return out.tolist()
189
+ elif isinstance(data, pl.DataFrame):
190
+ exprs = []
191
+ for p in column:
192
+ if p in data.columns:
193
+ exprs.append(pl.col(p).cast(pl.Utf8).fill_null(""))
194
+ else:
195
+ exprs.append(pl.lit(p))
196
+
197
+ result = data.select(pl.concat_str(exprs, separator="", ignore_nulls=False).alias("concat"))
198
+ return result["concat"].to_list()
199
+ except Exception as e:
200
+ raise ValueError(f"Error handling column concatentation: {e}")
201
+
162
202
  def handle_data_helper(
163
203
  self, data: Union[List, pd.DataFrame, pl.DataFrame, str], column: str = None
164
204
  ):
@@ -167,7 +207,10 @@ class Sutro:
167
207
  elif isinstance(data, (pd.DataFrame, pl.DataFrame)):
168
208
  if column is None:
169
209
  raise ValueError("Column name must be specified for DataFrame input")
170
- input_data = data[column].to_list()
210
+ if isinstance(column, list):
211
+ input_data = self.do_dataframe_column_concatenation(data, column)
212
+ elif isinstance(column, str):
213
+ input_data = data[column].to_list()
171
214
  elif isinstance(data, str):
172
215
  if data.startswith("dataset-"):
173
216
  input_data = data + ":" + column
@@ -212,7 +255,7 @@ class Sutro:
212
255
  self,
213
256
  data: Union[List, pd.DataFrame, pl.DataFrame, str],
214
257
  model: ModelOptions,
215
- column: str,
258
+ column: Union[str, List[str]],
216
259
  output_column: str,
217
260
  job_priority: int,
218
261
  json_schema: Dict[str, Any],
@@ -222,7 +265,15 @@ class Sutro:
222
265
  stay_attached: Optional[bool],
223
266
  random_seed_per_input: bool,
224
267
  truncate_rows: bool,
268
+ name: str,
269
+ description: str,
225
270
  ):
271
+ # Validate name and description lengths
272
+ if name is not None and len(name) > JOB_NAME_CHAR_LIMIT:
273
+ raise ValueError(f"Job name cannot exceed {JOB_NAME_CHAR_LIMIT} characters.")
274
+ if description is not None and len(description) > JOB_DESCRIPTION_CHAR_LIMIT:
275
+ raise ValueError(f"Job description cannot exceed {JOB_DESCRIPTION_CHAR_LIMIT} characters.")
276
+
226
277
  input_data = self.handle_data_helper(data, column)
227
278
  endpoint = f"{self.base_url}/batch-inference"
228
279
  headers = {
@@ -239,6 +290,8 @@ class Sutro:
239
290
  "sampling_params": sampling_params,
240
291
  "random_seed_per_input": random_seed_per_input,
241
292
  "truncate_rows": truncate_rows,
293
+ "name": name,
294
+ "description": description,
242
295
  }
243
296
 
244
297
  # There are two gotchas with yaspin:
@@ -284,9 +337,10 @@ class Sutro:
284
337
  )
285
338
  return job_id
286
339
  else:
340
+ name_text = f" and name {name}" if name is not None else ""
287
341
  spinner.write(
288
342
  to_colored_text(
289
- f"🛠 Priority {job_priority} Job created with ID: {job_id}.",
343
+ f"🛠 Priority {job_priority} Job created with ID: {job_id}{name_text}.",
290
344
  state="success",
291
345
  )
292
346
  )
@@ -458,7 +512,9 @@ class Sutro:
458
512
  self,
459
513
  data: Union[List, pd.DataFrame, pl.DataFrame, str],
460
514
  model: Union[ModelOptions, List[ModelOptions]] = "gemma-3-12b-it",
461
- column: str = None,
515
+ name: Union[str, List[str]] = None,
516
+ description: Union[str, List[str]] = None,
517
+ column: Union[str, List[str]] = None,
462
518
  output_column: str = "inference_result",
463
519
  job_priority: int = 0,
464
520
  output_schema: Union[Dict[str, Any], BaseModel] = None,
@@ -478,7 +534,9 @@ class Sutro:
478
534
  Args:
479
535
  data (Union[List, pd.DataFrame, pl.DataFrame, str]): The data to run inference on.
480
536
  model (Union[ModelOptions, List[ModelOptions]], optional): The model(s) to use for inference. Defaults to "llama-3.1-8b". You can pass a single model or a list of models. In the case of a list, the inference will be run in parallel for each model and stay_attached will be set to False.
481
- column (str, optional): The column name to use for inference. Required if data is a DataFrame, file path, or dataset.
537
+ name (Union[str, List[str]], optional): A job name for experiment/metadata tracking purposes. If using a list of models, you must pass a list of names with length equal to the number of models, or None. Defaults to None.
538
+ description (Union[str, List[str]], optional): A job description for experiment/metadata tracking purposes. If using a list of models, you must pass a list of descriptions with length equal to the number of models, or None. Defaults to None.
539
+ column (Union[str, List[str]], optional): The column name to use for inference. Required if data is a DataFrame, file path, or dataset. If a list is supplied, it will concatenate the columns of the list into a single column, accepting separator strings.
482
540
  output_column (str, optional): The column name to store the inference results in if the input is a DataFrame. Defaults to "inference_result".
483
541
  job_priority (int, optional): The priority of the job. Defaults to 0.
484
542
  output_schema (Union[Dict[str, Any], BaseModel], optional): A structured schema for the output.
@@ -503,6 +561,30 @@ class Sutro:
503
561
  model_list = model
504
562
  stay_attached = False
505
563
 
564
+ if isinstance(model_list, list):
565
+ if isinstance(name, list):
566
+ if len(name) != len(model_list):
567
+ raise ValueError("Name list must be the same length as the model list.")
568
+ name_list = name
569
+ elif isinstance(name, str):
570
+ raise ValueError("Name must be a list if using a list of models.")
571
+ else:
572
+ if isinstance(name, list):
573
+ raise ValueError("Name must be a string or None if using a single model.")
574
+ name_list = [name]
575
+
576
+ if isinstance(model_list, list):
577
+ if isinstance(description, list):
578
+ if len(description) != len(model_list):
579
+ raise ValueError("Descriptions list must be the same length as the model list.")
580
+ description_list = description
581
+ elif isinstance(description, str):
582
+ raise ValueError("Description must be a list if using a list of models.")
583
+ else:
584
+ if isinstance(name, list):
585
+ raise ValueError("Description must be a string or None if using a single model.")
586
+ description_list = [description]
587
+
506
588
  # Convert BaseModel to dict if needed
507
589
  if output_schema is not None:
508
590
  if hasattr(
@@ -517,12 +599,12 @@ class Sutro:
517
599
  )
518
600
  else:
519
601
  json_schema = None
520
-
602
+
521
603
  results = []
522
- for model in model_list:
604
+ for i in range(len(model_list)):
523
605
  res = self._run_one_batch_inference(
524
606
  data,
525
- model,
607
+ model_list[i],
526
608
  column,
527
609
  output_column,
528
610
  job_priority,
@@ -533,6 +615,8 @@ class Sutro:
533
615
  stay_attached,
534
616
  random_seed_per_input,
535
617
  truncate_rows,
618
+ name_list[i],
619
+ description_list[i],
536
620
  )
537
621
  results.append(res)
538
622
 
File without changes
File without changes
File without changes
File without changes
File without changes