sutro 0.1.13__tar.gz → 0.1.15__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of sutro might be problematic. Click here for more details.

sutro-0.1.15/PKG-INFO ADDED
@@ -0,0 +1,23 @@
1
+ Metadata-Version: 2.4
2
+ Name: sutro
3
+ Version: 0.1.15
4
+ Summary: Sutro Python SDK
5
+ Project-URL: Homepage, https://sutro.sh
6
+ Project-URL: Documentation, https://docs.sutro.sh
7
+ License-Expression: Apache-2.0
8
+ License-File: LICENSE
9
+ Requires-Python: >=3.10
10
+ Requires-Dist: click==8.1.7
11
+ Requires-Dist: colorama==0.4.4
12
+ Requires-Dist: numpy==2.1.1
13
+ Requires-Dist: pandas==2.2.3
14
+ Requires-Dist: polars==1.8.2
15
+ Requires-Dist: pydantic==2.11.4
16
+ Requires-Dist: requests==2.32.3
17
+ Requires-Dist: tqdm==4.67.1
18
+ Requires-Dist: yaspin==3.1.0
19
+ Description-Content-Type: text/markdown
20
+
21
+ # sutro-client
22
+
23
+ The official Python client for Sutro. See [docs.sutro.sh](https://docs.sutro.sh/) for more information.
sutro-0.1.15/README.md ADDED
@@ -0,0 +1,3 @@
1
+ # sutro-client
2
+
3
+ The official Python client for Sutro. See [docs.sutro.sh](https://docs.sutro.sh/) for more information.
@@ -9,7 +9,7 @@ installer = "uv"
9
9
 
10
10
  [project]
11
11
  name = "sutro"
12
- version = "0.1.13"
12
+ version = "0.1.15"
13
13
  description = "Sutro Python SDK"
14
14
  readme = "README.md"
15
15
  requires-python = ">=3.10"
@@ -275,34 +275,34 @@ def cancel(job_id):
275
275
 
276
276
 
277
277
  @cli.group()
278
- def stages():
279
- """Manage stages."""
278
+ def datasets():
279
+ """Manage datasets."""
280
280
  pass
281
281
 
282
282
 
283
- @stages.command()
283
+ @datasets.command()
284
284
  def create():
285
- """Create a new stage."""
285
+ """Create a new dataset."""
286
286
  sdk = get_sdk()
287
- stage_id = sdk.create_stage()
288
- if not stage_id:
287
+ dataset_id = sdk.create_dataset()
288
+ if not dataset_id:
289
289
  return
290
290
  click.echo(
291
291
  Fore.GREEN
292
- + f"Stage created successfully. Stage ID: {stage_id}"
292
+ + f"Dataset created successfully. Dataset ID: {dataset_id}"
293
293
  + Style.RESET_ALL
294
294
  )
295
295
 
296
296
 
297
- @stages.command()
297
+ @datasets.command()
298
298
  def list():
299
- """List all stages."""
299
+ """List all datasets."""
300
300
  sdk = get_sdk()
301
- stages = sdk.list_stages()
302
- if stages is None or len(stages) == 0:
303
- click.echo(Fore.YELLOW + "No stages found." + Style.RESET_ALL)
301
+ datasets = sdk.list_datasets()
302
+ if datasets is None or len(datasets) == 0:
303
+ click.echo(Fore.YELLOW + "No datasets found." + Style.RESET_ALL)
304
304
  return
305
- df = pl.DataFrame(stages)
305
+ df = pl.DataFrame(datasets)
306
306
 
307
307
  df = df.with_columns(
308
308
  pl.col("schema")
@@ -319,37 +319,37 @@ def list():
319
319
  print(df.select(pl.all()))
320
320
 
321
321
 
322
- @stages.command()
323
- @click.argument("stage_id")
324
- def files(stage_id):
325
- """List all files in a stage."""
322
+ @datasets.command()
323
+ @click.argument("dataset_id")
324
+ def files(dataset_id):
325
+ """List all files in a dataset."""
326
326
  sdk = get_sdk()
327
- files = sdk.list_stage_files(stage_id)
327
+ files = sdk.list_dataset_files(dataset_id)
328
328
  if not files:
329
329
  return
330
330
 
331
- print(Fore.YELLOW + "Files in stage " + stage_id + ":" + Style.RESET_ALL)
331
+ print(Fore.YELLOW + "Files in dataset " + dataset_id + ":" + Style.RESET_ALL)
332
332
  for file in files:
333
333
  print(f"\t{file}")
334
334
 
335
335
 
336
- @stages.command()
337
- @click.argument("stage_id", required=False)
336
+ @datasets.command()
337
+ @click.argument("dataset_id", required=False)
338
338
  @click.argument("file_path")
339
- def upload(file_path, stage_id):
340
- """Upload files to a stage. You can provide a single file path or a directory path to upload all files in the directory."""
339
+ def upload(file_path, dataset_id):
340
+ """Upload files to a dataset. You can provide a single file path or a directory path to upload all files in the directory."""
341
341
  sdk = get_sdk()
342
- sdk.upload_to_stage(file_path, stage_id)
342
+ sdk.upload_to_dataset(file_path, dataset_id)
343
343
 
344
344
 
345
- @stages.command()
346
- @click.argument("stage_id")
345
+ @datasets.command()
346
+ @click.argument("dataset_id")
347
347
  @click.argument("file_name", required=False)
348
348
  @click.argument("output_path", required=False)
349
- def download(stage_id, file_name=None, output_path=None):
350
- """Download a file/files from a stage. If no files are provided, all files in the stage will be downloaded. If no output path is provided, the file will be saved to the current working directory."""
349
+ def download(dataset_id, file_name=None, output_path=None):
350
+ """Download a file/files from a dataset. If no files are provided, all files in the dataset will be downloaded. If no output path is provided, the file will be saved to the current working directory."""
351
351
  sdk = get_sdk()
352
- files = sdk.download_from_stage(stage_id, [file_name], output_path)
352
+ files = sdk.download_from_dataset(dataset_id, [file_name], output_path)
353
353
  if not files:
354
354
  return
355
355
  for file in files:
@@ -30,6 +30,25 @@ def is_jupyter() -> bool:
30
30
  YASPIN_COLOR = None if is_jupyter() else "blue"
31
31
  SPINNER = Spinners.dots14
32
32
 
33
+ # Models available for inference. Keep in sync with the backend configuration
34
+ # so users get helpful autocompletion when selecting a model.
35
+ ModelOptions = Literal[
36
+ "llama-3.2-3b",
37
+ "llama-3.1-8b",
38
+ "llama-3.3-70b-8k",
39
+ "llama-3.3-70b-64k",
40
+ "qwen-qwq-32b-8k",
41
+ "qwen-3-4b",
42
+ "qwen-3-32b",
43
+ "qwen-3-4b-thinking",
44
+ "qwen-3-32b-thinking",
45
+ "gemma-3-4b-it",
46
+ "gemma-3-27b-it-16k",
47
+ "gemma-3-27b-it-128k",
48
+ "multilingual-e5-large-instruct",
49
+ "gte-qwen2-7b-instruct",
50
+ ]
51
+
33
52
 
34
53
  def to_colored_text(
35
54
  text: str, state: Optional[Literal["success", "fail"]] = None
@@ -114,7 +133,7 @@ class Sutro:
114
133
  raise ValueError("Column name must be specified for DataFrame input")
115
134
  input_data = data[column].to_list()
116
135
  elif isinstance(data, str):
117
- if data.startswith("stage-"):
136
+ if data.startswith("dataset-"):
118
137
  input_data = data + ":" + column
119
138
  else:
120
139
  file_ext = os.path.splitext(data)[1].lower()
@@ -156,7 +175,7 @@ class Sutro:
156
175
  def infer(
157
176
  self,
158
177
  data: Union[List, pd.DataFrame, pl.DataFrame, str],
159
- model: str = "llama-3.1-8b",
178
+ model: ModelOptions = "llama-3.1-8b",
160
179
  column: str = None,
161
180
  output_column: str = "inference_result",
162
181
  job_priority: int = 0,
@@ -172,12 +191,12 @@ class Sutro:
172
191
  Run inference on the provided data.
173
192
 
174
193
  This method allows you to run inference on the provided data using the Sutro API.
175
- It supports various data types such as lists, pandas DataFrames, polars DataFrames, file paths and stages.
194
+ It supports various data types such as lists, pandas DataFrames, polars DataFrames, file paths and datasets.
176
195
 
177
196
  Args:
178
197
  data (Union[List, pd.DataFrame, pl.DataFrame, str]): The data to run inference on.
179
- model (str, optional): The model to use for inference. Defaults to "llama-3.1-8b".
180
- column (str, optional): The column name to use for inference. Required if data is a DataFrame, file path, or stage.
198
+ model (ModelOptions, optional): The model to use for inference. Defaults to "llama-3.1-8b".
199
+ column (str, optional): The column name to use for inference. Required if data is a DataFrame, file path, or dataset.
181
200
  output_column (str, optional): The column name to store the inference results in if the input is a DataFrame. Defaults to "inference_result".
182
201
  job_priority (int, optional): The priority of the job. Defaults to 0.
183
202
  output_schema (Union[Dict[str, Any], BaseModel], optional): A structured schema for the output.
@@ -793,22 +812,22 @@ class Sutro:
793
812
  return
794
813
  return response.json()
795
814
 
796
- def create_stage(self):
815
+ def create_dataset(self):
797
816
  """
798
- Create a new stage.
817
+ Create a new dataset.
799
818
 
800
- This method creates a new stage and returns its ID.
819
+ This method creates a new empty dataset and returns its ID.
801
820
 
802
821
  Returns:
803
- str: The ID of the new stage.
822
+ str: The ID of the new dataset.
804
823
  """
805
- endpoint = f"{self.base_url}/create-stage"
824
+ endpoint = f"{self.base_url}/create-dataset"
806
825
  headers = {
807
826
  "Authorization": f"Key {self.api_key}",
808
827
  "Content-Type": "application/json",
809
828
  }
810
829
  with yaspin(
811
- SPINNER, text=to_colored_text("Creating stage"), color=YASPIN_COLOR
830
+ SPINNER, text=to_colored_text("Creating dataset"), color=YASPIN_COLOR
812
831
  ) as spinner:
813
832
  response = requests.get(endpoint, headers=headers)
814
833
  if response.status_code != 200:
@@ -820,25 +839,25 @@ class Sutro:
820
839
  spinner.stop()
821
840
  print(to_colored_text(response.json(), state="fail"))
822
841
  return
823
- stage_id = response.json()["stage_id"]
842
+ dataset_id = response.json()["dataset_id"]
824
843
  spinner.write(
825
- to_colored_text(f"✔ Stage created with ID: {stage_id}", state="success")
844
+ to_colored_text(f"✔ Dataset created with ID: {dataset_id}", state="success")
826
845
  )
827
- return stage_id
846
+ return dataset_id
828
847
 
829
- def upload_to_stage(
848
+ def upload_to_dataset(
830
849
  self,
831
- stage_id: Union[List[str], str] = None,
850
+ dataset_id: Union[List[str], str] = None,
832
851
  file_paths: Union[List[str], str] = None,
833
852
  verify_ssl: bool = True,
834
853
  ):
835
854
  """
836
- Upload data to a stage.
855
+ Upload data to a dataset.
837
856
 
838
- This method uploads files to a stage. Accepts a stage ID and file paths. If only a single parameter is provided, it will be interpreted as the file paths.
857
+ This method uploads files to a dataset. Accepts a dataset ID and file paths. If only a single parameter is provided, it will be interpreted as the file paths.
839
858
 
840
859
  Args:
841
- stage_id (str): The ID of the stage to upload to. If not provided, a new stage will be created.
860
+ dataset_id (str): The ID of the dataset to upload to. If not provided, a new dataset will be created.
842
861
  file_paths (Union[List[str], str]): A list of paths to the files to upload, or a single path to a collection of files.
843
862
  verify_ssl (bool): Whether to verify SSL certificates. Set to False to bypass SSL verification for troubleshooting.
844
863
 
@@ -846,17 +865,17 @@ class Sutro:
846
865
  dict: The response from the API.
847
866
  """
848
867
  # when only a single parameter is provided, it is interpreted as the file paths
849
- if file_paths is None and stage_id is not None:
850
- file_paths = stage_id
851
- stage_id = None
868
+ if file_paths is None and dataset_id is not None:
869
+ file_paths = dataset_id
870
+ dataset_id = None
852
871
 
853
872
  if file_paths is None:
854
873
  raise ValueError("File paths must be provided")
855
874
 
856
- if stage_id is None:
857
- stage_id = self.create_stage()
875
+ if dataset_id is None:
876
+ dataset_id = self.create_dataset()
858
877
 
859
- endpoint = f"{self.base_url}/upload-to-stage"
878
+ endpoint = f"{self.base_url}/upload-to-dataset"
860
879
 
861
880
  if isinstance(file_paths, str):
862
881
  # check if the file path is a directory
@@ -871,7 +890,7 @@ class Sutro:
871
890
 
872
891
  with yaspin(
873
892
  SPINNER,
874
- text=to_colored_text(f"Uploading files to stage: {stage_id}"),
893
+ text=to_colored_text(f"Uploading files to dataset: {dataset_id}"),
875
894
  color=YASPIN_COLOR,
876
895
  ) as spinner:
877
896
  count = 0
@@ -887,7 +906,7 @@ class Sutro:
887
906
  }
888
907
 
889
908
  payload = {
890
- "stage_id": stage_id,
909
+ "dataset_id": dataset_id,
891
910
  }
892
911
 
893
912
  headers = {
@@ -896,7 +915,7 @@ class Sutro:
896
915
  count += 1
897
916
  spinner.write(
898
917
  to_colored_text(
899
- f"Uploading file {count}/{len(file_paths)} to stage: {stage_id}"
918
+ f"Uploading file {count}/{len(file_paths)} to dataset: {dataset_id}"
900
919
  )
901
920
  )
902
921
 
@@ -923,19 +942,19 @@ class Sutro:
923
942
 
924
943
  spinner.write(
925
944
  to_colored_text(
926
- f"✔ {count} files successfully uploaded to stage", state="success"
945
+ f"✔ {count} files successfully uploaded to dataset", state="success"
927
946
  )
928
947
  )
929
- return stage_id
948
+ return dataset_id
930
949
 
931
- def list_stages(self):
932
- endpoint = f"{self.base_url}/list-stages"
950
+ def list_datasets(self):
951
+ endpoint = f"{self.base_url}/list-datasets"
933
952
  headers = {
934
953
  "Authorization": f"Key {self.api_key}",
935
954
  "Content-Type": "application/json",
936
955
  }
937
956
  with yaspin(
938
- SPINNER, text=to_colored_text("Retrieving stages"), color=YASPIN_COLOR
957
+ SPINNER, text=to_colored_text("Retrieving datasets"), color=YASPIN_COLOR
939
958
  ) as spinner:
940
959
  response = requests.post(endpoint, headers=headers)
941
960
  if response.status_code != 200:
@@ -946,21 +965,21 @@ class Sutro:
946
965
  )
947
966
  print(to_colored_text(f"Error: {response.json()}", state="fail"))
948
967
  return
949
- spinner.write(to_colored_text("✔ Stages retrieved", state="success"))
950
- return response.json()["stages"]
968
+ spinner.write(to_colored_text("✔ Datasets retrieved", state="success"))
969
+ return response.json()["datasets"]
951
970
 
952
- def list_stage_files(self, stage_id: str):
953
- endpoint = f"{self.base_url}/list-stage-files"
971
+ def list_dataset_files(self, dataset_id: str):
972
+ endpoint = f"{self.base_url}/list-dataset-files"
954
973
  headers = {
955
974
  "Authorization": f"Key {self.api_key}",
956
975
  "Content-Type": "application/json",
957
976
  }
958
977
  payload = {
959
- "stage_id": stage_id,
978
+ "dataset_id": dataset_id,
960
979
  }
961
980
  with yaspin(
962
981
  SPINNER,
963
- text=to_colored_text(f"Listing files in stage: {stage_id}"),
982
+ text=to_colored_text(f"Listing files in dataset: {dataset_id}"),
964
983
  color=YASPIN_COLOR,
965
984
  ) as spinner:
966
985
  response = requests.post(
@@ -975,27 +994,27 @@ class Sutro:
975
994
  print(to_colored_text(f"Error: {response.json()}", state="fail"))
976
995
  return
977
996
  spinner.write(
978
- to_colored_text(f"✔ Files listed in stage: {stage_id}", state="success")
997
+ to_colored_text(f"✔ Files listed in dataset: {dataset_id}", state="success")
979
998
  )
980
999
  return response.json()["files"]
981
1000
 
982
- def download_from_stage(
1001
+ def download_from_dataset(
983
1002
  self,
984
- stage_id: str,
1003
+ dataset_id: str,
985
1004
  files: Union[List[str], str] = None,
986
1005
  output_path: str = None,
987
1006
  ):
988
- endpoint = f"{self.base_url}/download-from-stage"
1007
+ endpoint = f"{self.base_url}/download-from-dataset"
989
1008
 
990
1009
  if files is None:
991
- files = self.list_stage_files(stage_id)
1010
+ files = self.list_dataset_files(dataset_id)
992
1011
  elif isinstance(files, str):
993
1012
  files = [files]
994
1013
 
995
1014
  if not files:
996
1015
  print(
997
1016
  to_colored_text(
998
- f"Couldn't find files for stage ID: {stage_id}", state="fail"
1017
+ f"Couldn't find files for dataset ID: {dataset_id}", state="fail"
999
1018
  )
1000
1019
  )
1001
1020
  return
@@ -1006,7 +1025,7 @@ class Sutro:
1006
1025
 
1007
1026
  with yaspin(
1008
1027
  SPINNER,
1009
- text=to_colored_text(f"Downloading files from stage: {stage_id}"),
1028
+ text=to_colored_text(f"Downloading files from dataset: {dataset_id}"),
1010
1029
  color=YASPIN_COLOR,
1011
1030
  ) as spinner:
1012
1031
  count = 0
@@ -1016,11 +1035,11 @@ class Sutro:
1016
1035
  "Content-Type": "application/json",
1017
1036
  }
1018
1037
  payload = {
1019
- "stage_id": stage_id,
1038
+ "dataset_id": dataset_id,
1020
1039
  "file_name": file,
1021
1040
  }
1022
1041
  spinner.text = to_colored_text(
1023
- f"Downloading file {count + 1}/{len(files)} from stage: {stage_id}"
1042
+ f"Downloading file {count + 1}/{len(files)} from dataset: {dataset_id}"
1024
1043
  )
1025
1044
  response = requests.post(
1026
1045
  endpoint, headers=headers, data=json.dumps(payload)
@@ -1039,7 +1058,7 @@ class Sutro:
1039
1058
  count += 1
1040
1059
  spinner.write(
1041
1060
  to_colored_text(
1042
- f"✔ {count} files successfully downloaded from stage: {stage_id}",
1061
+ f"✔ {count} files successfully downloaded from dataset: {dataset_id}",
1043
1062
  state="success",
1044
1063
  )
1045
1064
  )
sutro-0.1.13/PKG-INFO DELETED
@@ -1,41 +0,0 @@
1
- Metadata-Version: 2.4
2
- Name: sutro
3
- Version: 0.1.13
4
- Summary: Sutro Python SDK
5
- Project-URL: Homepage, https://sutro.sh
6
- Project-URL: Documentation, https://docs.sutro.sh
7
- License-Expression: Apache-2.0
8
- License-File: LICENSE
9
- Requires-Python: >=3.10
10
- Requires-Dist: click==8.1.7
11
- Requires-Dist: colorama==0.4.4
12
- Requires-Dist: numpy==2.1.1
13
- Requires-Dist: pandas==2.2.3
14
- Requires-Dist: polars==1.8.2
15
- Requires-Dist: pydantic==2.11.4
16
- Requires-Dist: requests==2.32.3
17
- Requires-Dist: tqdm==4.67.1
18
- Requires-Dist: yaspin==3.1.0
19
- Description-Content-Type: text/markdown
20
-
21
- # sutro-client
22
-
23
- The official Python client for Sutro. See [docs.sutro.sh](https://docs.sutro.sh/) for more information.
24
-
25
- ## Installing Locally (to test changes during development)
26
-
27
- Run `make install` from the root directory. This should remove the old builds and reinstall the package in your environment with the latest. You can run `uv pip list` to ensure the package is pointing at the local files instead of the PyPI package.
28
-
29
- ## Creating releases
30
-
31
- Make sure you increment the version appropriately in `pyproject.toml`. Generally speaking we'll do patch versions for small tweaks, minor versions for large additions or changes to behavior, and probably do major releases once it makes sense. Since we're still in beta and `0.x.x` releases, its probably okay to add backwards-incompatible changes to minor releases, but we want to avoid this if possible.
32
-
33
- To create a release, run:
34
-
35
- `make release <version>` with `<version>` formatted like `0.1.1`
36
-
37
- It'll prompt you for an API key to PyPI, which you must have for it to work.
38
-
39
- We also have a test PyPI account which you can use to test creating releases before pushing to the actual PyPI hub. I believe you can only create **one** release per version number, so it may be worth testing if you're paranoid about getting it right.
40
-
41
- Also make sure to update the docs and increment the docs version number to match the new release. Keeping these consistent will provide a better user experience.
sutro-0.1.13/README.md DELETED
@@ -1,21 +0,0 @@
1
- # sutro-client
2
-
3
- The official Python client for Sutro. See [docs.sutro.sh](https://docs.sutro.sh/) for more information.
4
-
5
- ## Installing Locally (to test changes during development)
6
-
7
- Run `make install` from the root directory. This should remove the old builds and reinstall the package in your environment with the latest. You can run `uv pip list` to ensure the package is pointing at the local files instead of the PyPI package.
8
-
9
- ## Creating releases
10
-
11
- Make sure you increment the version appropriately in `pyproject.toml`. Generally speaking we'll do patch versions for small tweaks, minor versions for large additions or changes to behavior, and probably do major releases once it makes sense. Since we're still in beta and `0.x.x` releases, its probably okay to add backwards-incompatible changes to minor releases, but we want to avoid this if possible.
12
-
13
- To create a release, run:
14
-
15
- `make release <version>` with `<version>` formatted like `0.1.1`
16
-
17
- It'll prompt you for an API key to PyPI, which you must have for it to work.
18
-
19
- We also have a test PyPI account which you can use to test creating releases before pushing to the actual PyPI hub. I believe you can only create **one** release per version number, so it may be worth testing if you're paranoid about getting it right.
20
-
21
- Also make sure to update the docs and increment the docs version number to match the new release. Keeping these consistent will provide a better user experience.
File without changes
File without changes
File without changes