structifyai 1.174.0__py3-none-any.whl → 1.175.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
structify/_version.py CHANGED
@@ -1,4 +1,4 @@
1
1
  # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
2
 
3
3
  __title__ = "structify"
4
- __version__ = "1.174.0" # x-release-please-version
4
+ __version__ = "1.175.0" # x-release-please-version
@@ -30,7 +30,7 @@ from ..lib.cost_confirmation import request_cost_confirmation_if_needed
30
30
  from .external_dataframe_proxy import ServicesProxy
31
31
  from ..types.save_requirement_param import RequiredEntity, RequiredProperty
32
32
  from ..types.dataset_descriptor_param import DatasetDescriptorParam
33
- from ..types.structure_run_async_params import SourcePdf, SourceWebWeb
33
+ from ..types.structure_run_async_params import SourceWebWeb
34
34
 
35
35
  __all__ = ["PolarsResource"]
36
36
 
@@ -847,6 +847,16 @@ class PolarsResource(SyncAPIResource):
847
847
 
848
848
  table_param = as_table_param(table_name, schema)
849
849
 
850
+ # Create dataset for this PDF
851
+ dataset_name = f"structure_pdfs_{table_name}_{uuid.uuid4().hex}"
852
+ self._client.datasets.create(
853
+ name=dataset_name,
854
+ description="",
855
+ tables=[table_param],
856
+ relationships=[],
857
+ ephemeral=True,
858
+ )
859
+
850
860
  node_id = get_node_id()
851
861
 
852
862
  # Validate model format if provided as string
@@ -857,9 +867,7 @@ class PolarsResource(SyncAPIResource):
857
867
  "model must be in format 'provider.model_name' (e.g. 'bedrock.claude-sonnet-4-bedrock')"
858
868
  )
859
869
 
860
- # Build lookups for per-row instructions by index (not by path, since the same PDF may appear multiple times)
861
870
  paths_df = document_paths.collect()
862
-
863
871
  instructions_list: list[str | None] = []
864
872
 
865
873
  if instructions is not None and not isinstance(instructions, str):
@@ -868,108 +876,74 @@ class PolarsResource(SyncAPIResource):
868
876
  raise ValueError(f"instructions shape {instr_df.shape} != document_paths shape {paths_df.shape}")
869
877
  instructions_list = cast(List[Optional[str]], instr_df[instr_df.columns[0]].to_list())
870
878
 
871
- def structure_batch(batch_df: pl.DataFrame) -> pl.DataFrame:
872
- # Track by row index since the same PDF may appear multiple times with different instructions
873
- batch_rows = batch_df.with_row_index("__row_idx__").to_dicts()
874
- valid_rows = [row for row in batch_rows if row.get(path_column) is not None]
875
-
876
- if not valid_rows:
877
- return pl.DataFrame(schema=polars_schema)
879
+ # Request cost confirmation before dispatching costly PDF extraction jobs
880
+ if not request_cost_confirmation_if_needed(self._client, paths_df.shape[0]):
881
+ raise Exception(f"User cancelled PDF extraction for {table_name}")
878
882
 
879
- # Request cost confirmation before dispatching costly PDF extraction jobs
880
- if not request_cost_confirmation_if_needed(self._client, len(valid_rows)):
881
- raise Exception(f"User cancelled PDF extraction for {table_name}")
883
+ job_to_pdf_path: dict[str, str] = {}
882
884
 
883
- # Process each PDF document
884
- job_ids: list[str] = []
885
- idx_to_dataset: dict[int, str] = {}
886
-
887
- def process_pdf(row: dict[str, Any]) -> Tuple[str, int, str]:
888
- row_idx = row["__row_idx__"]
889
- pdf_path = row[path_column]
890
- dataset_name = f"structure_pdfs_{table_name}_{uuid.uuid4().hex}"
891
-
892
- # Create dataset for this PDF
893
- self._client.datasets.create(
894
- name=dataset_name,
895
- description="",
896
- tables=[table_param],
897
- relationships=[],
898
- ephemeral=True,
899
- )
900
-
901
- # Upload the PDF document
902
- with open(pdf_path, "rb") as pdf_file:
885
+ # Process each PDF document
886
+ def process_pdf(pdf_path: str, instructions: str | None) -> Tuple[List[str], str]:
887
+ # Upload the PDF document
888
+ unique_pdf_name = f"{uuid.uuid4().hex}.pdf"
889
+ with open(pdf_path, "rb") as pdf_file:
890
+ try:
903
891
  self._client.documents.upload(
904
892
  content=pdf_file,
905
893
  file_type="PDF",
906
894
  dataset=dataset_name,
907
- path=f"{dataset_name}.pdf".encode(),
895
+ path=unique_pdf_name.encode(),
908
896
  )
909
-
910
- # Get per-row instructions and model
897
+ except Exception as e:
898
+ if "Document already exists" not in str(e):
899
+ raise e
900
+
901
+ job_ids = self._client.structure.pdf(
902
+ dataset=dataset_name,
903
+ path=unique_pdf_name,
904
+ node_id=node_id,
905
+ instructions=instructions,
906
+ mode="Single" if mode == "single" else "Batch",
907
+ model=model,
908
+ ).job_ids
909
+ return job_ids, pdf_path
910
+
911
+ with ThreadPoolExecutor(max_workers=MAX_PARALLEL_REQUESTS) as executor:
912
+ futures: List[Future[Tuple[List[str], str]]] = []
913
+ for i in range(paths_df.shape[0]):
914
+ path: str | None = paths_df[path_column][i]
911
915
  pdf_instructions: str | None = None
912
916
  if isinstance(instructions, str):
913
917
  pdf_instructions = instructions
914
918
  elif instructions_list:
915
- pdf_instructions = cast(Optional[str], instructions_list[row_idx])
916
- elif conditioning:
919
+ pdf_instructions = instructions_list[i]
920
+ if pdf_instructions is None and conditioning:
917
921
  pdf_instructions = conditioning
922
+ if path is not None:
923
+ futures.append(executor.submit(process_pdf, path, pdf_instructions))
924
+ for future in tqdm(as_completed(futures), total=len(futures), desc="Preparing PDFs"):
925
+ job_ids, pdf_path = future.result()
926
+ for job_id in job_ids:
927
+ job_to_pdf_path[job_id] = pdf_path
928
+
929
+ # Wait for all PDF processing jobs to complete
930
+ self._client.jobs.wait_for_jobs(dataset_name=dataset_name, title=f"Parsing PDFs", node_id=node_id)
931
+
932
+ # Get all of the entities with their job_ids
933
+ entities = self._client.datasets.view_table(dataset=dataset_name, name=table_name)
934
+ structured_results: List[Dict[str, Any]] = [
935
+ {**entity.properties, path_column: job_to_pdf_path[entity.job_ids[0]]} for entity in entities
936
+ ]
918
937
 
919
- job_id = self._client.structure.run_async(
920
- dataset=dataset_name,
921
- source=SourcePdf(pdf={"path": f"{dataset_name}.pdf", "single_agent": mode == "single"}),
922
- node_id=node_id,
923
- instructions=pdf_instructions,
924
- model=model,
925
- )
926
- return job_id, row_idx, dataset_name
927
-
928
- with ThreadPoolExecutor(max_workers=MAX_PARALLEL_REQUESTS) as executor:
929
- futures = [executor.submit(process_pdf, row) for row in valid_rows]
930
- for future in tqdm(as_completed(futures), total=len(futures), desc="Preparing PDFs"):
931
- job_id, row_idx, dataset_name = future.result()
932
- job_ids.append(job_id)
933
- idx_to_dataset[row_idx] = dataset_name
934
-
935
- # Wait for all PDF processing jobs to complete
936
- self._client.jobs.wait_for_jobs(job_ids=job_ids, title=f"Parsing {table_name} from PDFs", node_id=node_id)
937
-
938
- # Collect results from all processed PDFs - each result is tagged with its source row_idx
939
- structured_results: list[dict[str, Any]] = []
940
-
941
- def collect_pdf_results(row_idx: int, dataset_name: str) -> List[Dict[str, Any]]:
942
- pdf_path = batch_rows[row_idx][path_column]
943
- entities_result = self._client.datasets.view_table(dataset=dataset_name, name=table_name)
944
- return [
945
- {**entity.properties, path_column: pdf_path, "__row_idx__": row_idx} for entity in entities_result
946
- ]
947
-
948
- with ThreadPoolExecutor(max_workers=MAX_PARALLEL_REQUESTS) as executor:
949
- collect_futures = [
950
- executor.submit(collect_pdf_results, row_idx, dataset_name)
951
- for row_idx, dataset_name in idx_to_dataset.items()
952
- ]
953
- for future in tqdm(
954
- as_completed(collect_futures), total=len(collect_futures), desc="Collecting PDF extractions"
955
- ):
956
- results = future.result()
957
- structured_results.extend(results)
958
-
959
- # Ensure all columns are present with None for missing values
960
- for result_row in structured_results:
961
- for col_name in polars_schema.names():
962
- if col_name not in result_row:
963
- result_row[col_name] = None
964
-
965
- if not structured_results:
966
- return pl.DataFrame(schema=polars_schema)
967
-
968
- # Build result dataframe directly from structured_results without joining
969
- # Each entity is already tagged with path_column from its source PDF
970
- return pl.DataFrame(structured_results, schema=polars_schema)
938
+ # Ensure all columns are present with None for missing values
939
+ for result_row in structured_results:
940
+ for col_name in polars_schema.names():
941
+ if col_name not in result_row:
942
+ result_row[col_name] = None
971
943
 
972
- return document_paths.map_batches(structure_batch, schema=polars_schema, no_optimizations=True)
944
+ # Build result dataframe directly from structured_results without joining
945
+ # Each entity is already tagged with path_column from its source PDF
946
+ return pl.DataFrame(structured_results, schema=polars_schema).lazy()
973
947
 
974
948
  def tag(
975
949
  self,
@@ -3,10 +3,12 @@
3
3
  from __future__ import annotations
4
4
 
5
5
  from typing import Iterable, Optional
6
+ from typing_extensions import Literal
6
7
 
7
8
  import httpx
8
9
 
9
10
  from ..types import (
11
+ structure_pdf_params,
10
12
  structure_run_async_params,
11
13
  structure_job_status_params,
12
14
  structure_enhance_property_params,
@@ -26,6 +28,7 @@ from .._response import (
26
28
  from .._base_client import make_request_options
27
29
  from ..types.knowledge_graph_param import KnowledgeGraphParam
28
30
  from ..types.save_requirement_param import SaveRequirementParam
31
+ from ..types.structure_pdf_response import StructurePdfResponse
29
32
  from ..types.structure_job_status_response import StructureJobStatusResponse
30
33
 
31
34
  __all__ = ["StructureResource", "AsyncStructureResource"]
@@ -267,6 +270,55 @@ class StructureResource(SyncAPIResource):
267
270
  cast_to=StructureJobStatusResponse,
268
271
  )
269
272
 
273
+ def pdf(
274
+ self,
275
+ *,
276
+ dataset: str,
277
+ path: str,
278
+ instructions: Optional[str] | Omit = omit,
279
+ mode: Literal["Single", "Batch"] | Omit = omit,
280
+ model: Optional[str] | Omit = omit,
281
+ node_id: Optional[str] | Omit = omit,
282
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
283
+ # The extra values given here take precedence over values defined on the client or passed to this method.
284
+ extra_headers: Headers | None = None,
285
+ extra_query: Query | None = None,
286
+ extra_body: Body | None = None,
287
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
288
+ ) -> StructurePdfResponse:
289
+ """For single mode, creates one job for the entire PDF.
290
+
291
+ For batch mode, creates one
292
+ job per PDF page.
293
+
294
+ Args:
295
+ extra_headers: Send extra headers
296
+
297
+ extra_query: Add additional query parameters to the request
298
+
299
+ extra_body: Add additional JSON properties to the request
300
+
301
+ timeout: Override the client-level default timeout for this request, in seconds
302
+ """
303
+ return self._post(
304
+ "/structure/pdf",
305
+ body=maybe_transform(
306
+ {
307
+ "dataset": dataset,
308
+ "path": path,
309
+ "instructions": instructions,
310
+ "mode": mode,
311
+ "model": model,
312
+ "node_id": node_id,
313
+ },
314
+ structure_pdf_params.StructurePdfParams,
315
+ ),
316
+ options=make_request_options(
317
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
318
+ ),
319
+ cast_to=StructurePdfResponse,
320
+ )
321
+
270
322
  def run_async(
271
323
  self,
272
324
  *,
@@ -560,6 +612,55 @@ class AsyncStructureResource(AsyncAPIResource):
560
612
  cast_to=StructureJobStatusResponse,
561
613
  )
562
614
 
615
+ async def pdf(
616
+ self,
617
+ *,
618
+ dataset: str,
619
+ path: str,
620
+ instructions: Optional[str] | Omit = omit,
621
+ mode: Literal["Single", "Batch"] | Omit = omit,
622
+ model: Optional[str] | Omit = omit,
623
+ node_id: Optional[str] | Omit = omit,
624
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
625
+ # The extra values given here take precedence over values defined on the client or passed to this method.
626
+ extra_headers: Headers | None = None,
627
+ extra_query: Query | None = None,
628
+ extra_body: Body | None = None,
629
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
630
+ ) -> StructurePdfResponse:
631
+ """For single mode, creates one job for the entire PDF.
632
+
633
+ For batch mode, creates one
634
+ job per PDF page.
635
+
636
+ Args:
637
+ extra_headers: Send extra headers
638
+
639
+ extra_query: Add additional query parameters to the request
640
+
641
+ extra_body: Add additional JSON properties to the request
642
+
643
+ timeout: Override the client-level default timeout for this request, in seconds
644
+ """
645
+ return await self._post(
646
+ "/structure/pdf",
647
+ body=await async_maybe_transform(
648
+ {
649
+ "dataset": dataset,
650
+ "path": path,
651
+ "instructions": instructions,
652
+ "mode": mode,
653
+ "model": model,
654
+ "node_id": node_id,
655
+ },
656
+ structure_pdf_params.StructurePdfParams,
657
+ ),
658
+ options=make_request_options(
659
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
660
+ ),
661
+ cast_to=StructurePdfResponse,
662
+ )
663
+
563
664
  async def run_async(
564
665
  self,
565
666
  *,
@@ -636,6 +737,9 @@ class StructureResourceWithRawResponse:
636
737
  self.job_status = to_raw_response_wrapper(
637
738
  structure.job_status,
638
739
  )
740
+ self.pdf = to_raw_response_wrapper(
741
+ structure.pdf,
742
+ )
639
743
  self.run_async = to_raw_response_wrapper(
640
744
  structure.run_async,
641
745
  )
@@ -660,6 +764,9 @@ class AsyncStructureResourceWithRawResponse:
660
764
  self.job_status = async_to_raw_response_wrapper(
661
765
  structure.job_status,
662
766
  )
767
+ self.pdf = async_to_raw_response_wrapper(
768
+ structure.pdf,
769
+ )
663
770
  self.run_async = async_to_raw_response_wrapper(
664
771
  structure.run_async,
665
772
  )
@@ -684,6 +791,9 @@ class StructureResourceWithStreamingResponse:
684
791
  self.job_status = to_streamed_response_wrapper(
685
792
  structure.job_status,
686
793
  )
794
+ self.pdf = to_streamed_response_wrapper(
795
+ structure.pdf,
796
+ )
687
797
  self.run_async = to_streamed_response_wrapper(
688
798
  structure.run_async,
689
799
  )
@@ -708,6 +818,9 @@ class AsyncStructureResourceWithStreamingResponse:
708
818
  self.job_status = async_to_streamed_response_wrapper(
709
819
  structure.job_status,
710
820
  )
821
+ self.pdf = async_to_streamed_response_wrapper(
822
+ structure.pdf,
823
+ )
711
824
  self.run_async = async_to_streamed_response_wrapper(
712
825
  structure.run_async,
713
826
  )
@@ -108,6 +108,41 @@ class WhitelabelResource(SyncAPIResource):
108
108
  cast_to=NoneType,
109
109
  )
110
110
 
111
+ def proxy_post(
112
+ self,
113
+ path: str,
114
+ *,
115
+ service: str,
116
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
117
+ # The extra values given here take precedence over values defined on the client or passed to this method.
118
+ extra_headers: Headers | None = None,
119
+ extra_query: Query | None = None,
120
+ extra_body: Body | None = None,
121
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
122
+ ) -> None:
123
+ """
124
+ Args:
125
+ extra_headers: Send extra headers
126
+
127
+ extra_query: Add additional query parameters to the request
128
+
129
+ extra_body: Add additional JSON properties to the request
130
+
131
+ timeout: Override the client-level default timeout for this request, in seconds
132
+ """
133
+ if not service:
134
+ raise ValueError(f"Expected a non-empty value for `service` but received {service!r}")
135
+ if not path:
136
+ raise ValueError(f"Expected a non-empty value for `path` but received {path!r}")
137
+ extra_headers = {"Accept": "*/*", **(extra_headers or {})}
138
+ return self._post(
139
+ f"/whitelabel/{service}/{path}",
140
+ options=make_request_options(
141
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
142
+ ),
143
+ cast_to=NoneType,
144
+ )
145
+
111
146
 
112
147
  class AsyncWhitelabelResource(AsyncAPIResource):
113
148
  @cached_property
@@ -198,6 +233,41 @@ class AsyncWhitelabelResource(AsyncAPIResource):
198
233
  cast_to=NoneType,
199
234
  )
200
235
 
236
+ async def proxy_post(
237
+ self,
238
+ path: str,
239
+ *,
240
+ service: str,
241
+ # Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
242
+ # The extra values given here take precedence over values defined on the client or passed to this method.
243
+ extra_headers: Headers | None = None,
244
+ extra_query: Query | None = None,
245
+ extra_body: Body | None = None,
246
+ timeout: float | httpx.Timeout | None | NotGiven = not_given,
247
+ ) -> None:
248
+ """
249
+ Args:
250
+ extra_headers: Send extra headers
251
+
252
+ extra_query: Add additional query parameters to the request
253
+
254
+ extra_body: Add additional JSON properties to the request
255
+
256
+ timeout: Override the client-level default timeout for this request, in seconds
257
+ """
258
+ if not service:
259
+ raise ValueError(f"Expected a non-empty value for `service` but received {service!r}")
260
+ if not path:
261
+ raise ValueError(f"Expected a non-empty value for `path` but received {path!r}")
262
+ extra_headers = {"Accept": "*/*", **(extra_headers or {})}
263
+ return await self._post(
264
+ f"/whitelabel/{service}/{path}",
265
+ options=make_request_options(
266
+ extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
267
+ ),
268
+ cast_to=NoneType,
269
+ )
270
+
201
271
 
202
272
  class WhitelabelResourceWithRawResponse:
203
273
  def __init__(self, whitelabel: WhitelabelResource) -> None:
@@ -209,6 +279,9 @@ class WhitelabelResourceWithRawResponse:
209
279
  self.proxy_get = to_raw_response_wrapper(
210
280
  whitelabel.proxy_get,
211
281
  )
282
+ self.proxy_post = to_raw_response_wrapper(
283
+ whitelabel.proxy_post,
284
+ )
212
285
 
213
286
 
214
287
  class AsyncWhitelabelResourceWithRawResponse:
@@ -221,6 +294,9 @@ class AsyncWhitelabelResourceWithRawResponse:
221
294
  self.proxy_get = async_to_raw_response_wrapper(
222
295
  whitelabel.proxy_get,
223
296
  )
297
+ self.proxy_post = async_to_raw_response_wrapper(
298
+ whitelabel.proxy_post,
299
+ )
224
300
 
225
301
 
226
302
  class WhitelabelResourceWithStreamingResponse:
@@ -233,6 +309,9 @@ class WhitelabelResourceWithStreamingResponse:
233
309
  self.proxy_get = to_streamed_response_wrapper(
234
310
  whitelabel.proxy_get,
235
311
  )
312
+ self.proxy_post = to_streamed_response_wrapper(
313
+ whitelabel.proxy_post,
314
+ )
236
315
 
237
316
 
238
317
  class AsyncWhitelabelResourceWithStreamingResponse:
@@ -245,3 +324,6 @@ class AsyncWhitelabelResourceWithStreamingResponse:
245
324
  self.proxy_get = async_to_streamed_response_wrapper(
246
325
  whitelabel.proxy_get,
247
326
  )
327
+ self.proxy_post = async_to_streamed_response_wrapper(
328
+ whitelabel.proxy_post,
329
+ )
@@ -111,6 +111,7 @@ from .scrape_list_response import ScrapeListResponse as ScrapeListResponse
111
111
  from .scrape_scrape_params import ScrapeScrapeParams as ScrapeScrapeParams
112
112
  from .select_team_response import SelectTeamResponse as SelectTeamResponse
113
113
  from .source_list_response import SourceListResponse as SourceListResponse
114
+ from .structure_pdf_params import StructurePdfParams as StructurePdfParams
114
115
  from .update_team_response import UpdateTeamResponse as UpdateTeamResponse
115
116
  from .workflow_stop_params import WorkflowStopParams as WorkflowStopParams
116
117
  from .connector_auth_method import ConnectorAuthMethod as ConnectorAuthMethod
@@ -148,6 +149,7 @@ from .list_projects_response import ListProjectsResponse as ListProjectsResponse
148
149
  from .remove_member_response import RemoveMemberResponse as RemoveMemberResponse
149
150
  from .save_requirement_param import SaveRequirementParam as SaveRequirementParam
150
151
  from .scrape_scrape_response import ScrapeScrapeResponse as ScrapeScrapeResponse
152
+ from .structure_pdf_response import StructurePdfResponse as StructurePdfResponse
151
153
  from .team_add_member_params import TeamAddMemberParams as TeamAddMemberParams
152
154
  from .workflow_schedule_info import WorkflowScheduleInfo as WorkflowScheduleInfo
153
155
  from .connector_create_params import ConnectorCreateParams as ConnectorCreateParams
@@ -35,7 +35,7 @@ class ParametersStructuringInputAgentAgentPdfPdf(BaseModel):
35
35
 
36
36
  path: str
37
37
 
38
- single_agent: Optional[bool] = None
38
+ page: Optional[int] = None
39
39
 
40
40
 
41
41
  class ParametersStructuringInputAgentAgentPdf(BaseModel):
@@ -1,6 +1,6 @@
1
1
  # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
2
 
3
- from typing import Dict, Union, Optional
3
+ from typing import Dict, List, Union, Optional
4
4
  from datetime import datetime
5
5
  from typing_extensions import Literal, TypeAlias
6
6
 
@@ -103,6 +103,8 @@ class DatasetViewTableResponse(BaseModel):
103
103
 
104
104
  dataset_id: str
105
105
 
106
+ job_ids: List[str]
107
+
106
108
  label: str
107
109
 
108
110
  properties: Dict[str, Properties]
@@ -36,7 +36,7 @@ class ParametersStructuringInputAgentAgentPdfPdf(BaseModel):
36
36
 
37
37
  path: str
38
38
 
39
- single_agent: Optional[bool] = None
39
+ page: Optional[int] = None
40
40
 
41
41
 
42
42
  class ParametersStructuringInputAgentAgentPdf(BaseModel):
@@ -0,0 +1,22 @@
1
+ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
+
3
+ from __future__ import annotations
4
+
5
+ from typing import Optional
6
+ from typing_extensions import Literal, Required, TypedDict
7
+
8
+ __all__ = ["StructurePdfParams"]
9
+
10
+
11
+ class StructurePdfParams(TypedDict, total=False):
12
+ dataset: Required[str]
13
+
14
+ path: Required[str]
15
+
16
+ instructions: Optional[str]
17
+
18
+ mode: Literal["Single", "Batch"]
19
+
20
+ model: Optional[str]
21
+
22
+ node_id: Optional[str]
@@ -0,0 +1,11 @@
1
+ # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
+
3
+ from typing import List
4
+
5
+ from .._models import BaseModel
6
+
7
+ __all__ = ["StructurePdfResponse"]
8
+
9
+
10
+ class StructurePdfResponse(BaseModel):
11
+ job_ids: List[str]
@@ -40,7 +40,7 @@ class SourcePdfPdf(TypedDict, total=False):
40
40
 
41
41
  path: Required[str]
42
42
 
43
- single_agent: bool
43
+ page: Optional[int]
44
44
 
45
45
 
46
46
  class SourcePdf(TypedDict, total=False):
@@ -5,5 +5,16 @@ from typing_extensions import Literal, TypeAlias
5
5
  __all__ = ["UsageGroupKey"]
6
6
 
7
7
  UsageGroupKey: TypeAlias = Literal[
8
- "web", "pdf", "derive", "scrape", "apollo", "searchapi", "newsapi", "match", "connectorexplore", "other"
8
+ "web",
9
+ "pdf",
10
+ "derive",
11
+ "scrape",
12
+ "apollo",
13
+ "searchapi",
14
+ "newsapi",
15
+ "secapi",
16
+ "cufinder",
17
+ "match",
18
+ "connectorexplore",
19
+ "other",
9
20
  ]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: structifyai
3
- Version: 1.174.0
3
+ Version: 1.175.0
4
4
  Summary: The official Python library for the structify API
5
5
  Project-URL: Homepage, https://github.com/StructifyAI/structify-python
6
6
  Project-URL: Repository, https://github.com/StructifyAI/structify-python
@@ -11,7 +11,7 @@ structify/_resource.py,sha256=tJi4pDQooQZ_zJwEwrLj-U-ye2hC-cbmr1GzIwCT10Y,1118
11
11
  structify/_response.py,sha256=RuNhMDiZUdPqEbmFJHDVI4FMPDszk8QjK9LVWm1Fagk,28806
12
12
  structify/_streaming.py,sha256=n4C9M7ITmANYn9LaWHNoqJdIIyF7svLco2qst7u3M7U,10233
13
13
  structify/_types.py,sha256=jj4p-m3vpUma0AdhPWIaljHZXeb4RKnrAusjVdpDy5Y,7597
14
- structify/_version.py,sha256=jAkquXwE8e7XiNAPLSl0tNkX5WyP7kZ2giJmBdnOmlM,163
14
+ structify/_version.py,sha256=dFWqiyi95rQIGV0JregKsPLWuKqIKL7smQzi9sUXYwo,163
15
15
  structify/pagination.py,sha256=ycybhWcpKk4ztsMcCA6C0WZiJejGrSx6bSr8LLskJUY,4346
16
16
  structify/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
17
  structify/_utils/__init__.py,sha256=7fch0GT9zpNnErbciSpUNa-SjTxxjY6kxHxKMOM4AGs,2305
@@ -39,7 +39,7 @@ structify/resources/external_dataframe_proxy.py,sha256=DSn0YwWIembR__ZtDxVCJtyY3
39
39
  structify/resources/jobs.py,sha256=gO1aSByi1dMvW90UDsMmNhLHFCOY4ENLkZcAx4gbLHY,30108
40
40
  structify/resources/match.py,sha256=gDWEWnKwEoLbheQAMFltJCk2ysa_L9AuJMOaauM7c4Y,12248
41
41
  structify/resources/nango.py,sha256=Zl0M1XhlVe61jHVd-SdYI9uEbEhIRmskvlk7Xp0Lh8g,9166
42
- structify/resources/polars.py,sha256=--kVVUoJ7AvGirnc462r0TRsK7lcXXDQD0XWhqtG3K8,61552
42
+ structify/resources/polars.py,sha256=KFeiRPCVGIssbFdTnGtTUEXNW46j6nHzmHN5rPeqhRA,59995
43
43
  structify/resources/projects.py,sha256=YDikBDB9D1EXyZ2GyRx4GlpQ83snw51YlNuU1sLHqho,14117
44
44
  structify/resources/public_sessions.py,sha256=_JmssE0MMjeGdxT0FWtrkcceSV4skeEkVGYeO2FkJes,9976
45
45
  structify/resources/sandbox.py,sha256=Gc7uxZAOmbXA477UHvS244BokI1Tte_34xxIj0dC5PA,16984
@@ -48,9 +48,9 @@ structify/resources/server.py,sha256=39G3yhj3NYWgnLq0PdS86lwDvXK6MIWAIl7G0ZLMJro
48
48
  structify/resources/sessions.py,sha256=xNKjg_H9JoZenCnMJJOURBa2bmXquwweqehCF4hGMlY,65863
49
49
  structify/resources/slack.py,sha256=CSMW3Eo-XRjISvNOgAcGyBdnmI9SdkQplE0P0lqTnjM,13082
50
50
  structify/resources/sources.py,sha256=K5jLPKu3LNH5vKL9V1NQlHaG8UI982H74enN_cjdf-0,13181
51
- structify/resources/structure.py,sha256=VMk10L7npByPFtSrddvVNPcGP-iE8PPO_4i2JctYp3A,28974
51
+ structify/resources/structure.py,sha256=jXcVdUuNc6Q-HwUhSDcsJONbBqRf_FcPAjNtjXYxrdo,33124
52
52
  structify/resources/teams.py,sha256=W1Isf2lXDLB-xdzgR8qJwCrziElOY25OcsQfIBKQ1Sc,55575
53
- structify/resources/whitelabel.py,sha256=owhs0tsBtuR9yS7H33Kf3dvbiB7kRlOIXVbLt-7Wx2Q,9579
53
+ structify/resources/whitelabel.py,sha256=0TQ3HJccdflpAVcHqydizT4RznkRvZBRP0--0szjDps,12865
54
54
  structify/resources/whitelabel_service.py,sha256=cqGBpRegtJRjt5WFNPCXTwyGKHTAbBUrhJP1UroEhVQ,9439
55
55
  structify/resources/wiki.py,sha256=0j5qrgQ72LE7GsuLyy4nylnEuU0VDSjTMRejaHlqa-8,19490
56
56
  structify/resources/workflow.py,sha256=D8gnt22usKoY9iA3HTNlbQyWTykNUXl2m3rFsPAKZZc,9273
@@ -77,7 +77,7 @@ structify/resources/user/__init__.py,sha256=vkGsM7uGlBXVVwuEZlICVLlIY3F75n_MIMpm
77
77
  structify/resources/user/api_keys.py,sha256=LgKvDGkHPGIumWmEsPCA1XYAKNuRN48nqG6anv8AVHE,14331
78
78
  structify/resources/user/stripe.py,sha256=MazkGQ5VuNY7TxMPXxJULBVerMGmOXaEcXpfQsUYoNs,13574
79
79
  structify/resources/user/user.py,sha256=Hk8D5KB8in-A9eJSYFIosILz4HgSWDwDDAVIQeQ8Sew,25411
80
- structify/types/__init__.py,sha256=-lfSCQdu7PvLm8aF4T-si-zkz6wxQsDFAmAu_GFxanQ,28089
80
+ structify/types/__init__.py,sha256=-qTlAuUWBfkvyS4M2A3X4CzLDOps2JQa_x5PLIwTcHk,28245
81
81
  structify/types/accept_invitation_response.py,sha256=hWIn0Sscg7U0uOqoZA5SG8ENZQuM0tsbgOlrJbcW37s,260
82
82
  structify/types/add_member_response.py,sha256=_YjqwGpsfdR9Dw6SAQ99TveFSD8Hov1IjVPMH2KC3CM,882
83
83
  structify/types/admin_grant_access_response.py,sha256=xCPdmejgZs1oXOsfgbW-cYKZa8getHRotRnsCav5QPU,390
@@ -181,7 +181,7 @@ structify/types/dataset_update_relationship_params.py,sha256=vSCxeVJM03JM4DlXmKp
181
181
  structify/types/dataset_view_relationships_params.py,sha256=Pptxz8SRA6-qFriarOLylrvIdbQyV7rqO_MVGvP4FjA,1001
182
182
  structify/types/dataset_view_relationships_response.py,sha256=IC5Ot90K_F7wSPBSz7rfIkw6nF8paVz1tIL-C37IiUw,1866
183
183
  structify/types/dataset_view_table_params.py,sha256=vD_qKTv13_mbBP4zIorOSO--RL2qp5pQRxXPdwPBuio,985
184
- structify/types/dataset_view_table_response.py,sha256=DsEctnRL0w2pzzDFHU18bIeBhD5bIalXVoLRe3M9Hro,1816
184
+ structify/types/dataset_view_table_response.py,sha256=Ultse5-ichgV1vnD0UJmfslMpEqQlnqPmRmQYId0gkA,1846
185
185
  structify/types/dataset_view_tables_with_relationships_params.py,sha256=0qCpvobbsnHIij0Z4nAJfDPl_bf8zGoER-3Cmve7Yxw,1021
186
186
  structify/types/dataset_view_tables_with_relationships_response.py,sha256=4ICbKqXwWfYrq7N-ZvlFY4kZCwVdo9k65NMaIL4KWZA,5670
187
187
  structify/types/delete_chat_session_response.py,sha256=HuoT7Z2lgj4Elha7SBWf7dBamo-vjoGY5kedaoAYgUw,292
@@ -263,7 +263,7 @@ structify/types/job_event_body.py,sha256=pO11fTm5sGvQ6cx7NvWTitUaunSqdRo-d5tkyh4
263
263
  structify/types/job_get_scrapers_response.py,sha256=-E9DaG9YCkrJeuffZ3RiI6MW7ZsbPjmlVU3jUQdggSY,694
264
264
  structify/types/job_get_source_entities_response.py,sha256=vlGKFkMKjII48EexIEBbls6-PAVYDDkmR1__aKjuiUo,3379
265
265
  structify/types/job_list_params.py,sha256=i_MYi4vIFC6Dq5I4E4htYwZejqRE5qAf2lgg_SRBuKg,1012
266
- structify/types/job_list_response.py,sha256=OMDUmcntraqTS8B-h7WqREgqbRVClK2a8ah4tFZnEgQ,5143
266
+ structify/types/job_list_response.py,sha256=mKWo7HlbMFtsGMjQHLwT8SCuV_Ktmf6pIAqoPx9a5HU,5134
267
267
  structify/types/job_status_params.py,sha256=wx7NFv1gJaIsy_fmCdPeEsVblIFRN5P1cUP1q8210HA,412
268
268
  structify/types/job_status_response.py,sha256=xcfeCXfxVOFQStG5W0Xm6Cszv2k6u-VKjb2UDfz_McE,277
269
269
  structify/types/knowledge_graph.py,sha256=HskWSN4Kw6nhwmIN6vZTNihKuYgztuDuAs2sZMhih90,607
@@ -347,7 +347,9 @@ structify/types/structure_is_complete_params.py,sha256=foIrU7dW5seHipqVl2f-Leb7n
347
347
  structify/types/structure_is_complete_response.py,sha256=YWZrs4YK7EyjdZk1GAfb9cFwJS9HCJ0mW52uxesGqu8,216
348
348
  structify/types/structure_job_status_params.py,sha256=uQmvkdZQ8c3XSAZSmshm0X7iDfyAKztzofziXSveWIE,479
349
349
  structify/types/structure_job_status_response.py,sha256=jZVwojhMJB7w7BwaMQH5W_lqlzb-fKd2SKo6v1xrW-A,806
350
- structify/types/structure_run_async_params.py,sha256=s3NONQF-Bwl9Iu8YKoiHJfNcoM_Z5C_byKHCAMfYiI8,1818
350
+ structify/types/structure_pdf_params.py,sha256=wzVxFvV0g-CZYtAjtqN5TIqLq6cUMf0B5sAzEENRvkg,473
351
+ structify/types/structure_pdf_response.py,sha256=WZMCq0HhYSbxkB8yLbfMJ0urYuWTPyTAiQdUNcLbLck,244
352
+ structify/types/structure_run_async_params.py,sha256=w7BwDOjGMkSRZTf5f84nQNFQ5kKM_RZjXomS1wvfjXI,1819
351
353
  structify/types/structure_run_async_response.py,sha256=RTK4Vh2A-51560FjLrRJiqKNgBjnuq3hd671_hi5lHU,212
352
354
  structify/types/survey_submission_response.py,sha256=OTjhVLOGQWATs7VEsn1_OVNxu5oKOS_WwSu82UhmtbE,240
353
355
  structify/types/table.py,sha256=Zx9V6jSUeCYAOn3ghA5xejTXVaplVm6Mew_qOVHxkIY,915
@@ -374,7 +376,7 @@ structify/types/update_member_role_response.py,sha256=q1jTM0lFmzvH8ki7GcSQAh-5cN
374
376
  structify/types/update_table_response.py,sha256=ypJEzW8JcLOmLpoZq46i7ADFUkHqBGeo0AhtiSq8HAk,1156
375
377
  structify/types/update_team_response.py,sha256=AOm2-jzpjF3UVRh3msyc8W00l-hNpk_SqmMI3oK_YBE,230
376
378
  structify/types/update_visibility_response.py,sha256=veHlq90QKDCiY__DLHGQQloyZhxy6wSJ_6bPn18bisY,279
377
- structify/types/usage_group_key.py,sha256=a0Fc4_raXvnASGPL9XSbMTsTq9uQxQEYFDJ-Jn2SMu0,313
379
+ structify/types/usage_group_key.py,sha256=vOoCW73KNK4BG6KXLPCF9D7J6JjJBssFvhDdDPlkHpo,380
378
380
  structify/types/user_enrich_params.py,sha256=bXOLnyPxG-iqFk1KZCjRGpcy8osRs6QYEVLtN_7HXf8,280
379
381
  structify/types/user_info.py,sha256=L2FckSvAPuRpgwNgwOoYF2WYwHrHmuDDAehq3-EeUWQ,1562
380
382
  structify/types/user_refresh_params.py,sha256=_YdNJ1SZg78JkhfLGiuMq-NsN7zAlF0DG3GvDaaMOpY,324
@@ -405,7 +407,7 @@ structify/types/workflow_stop_params.py,sha256=DLTGDSENmcdNQoV_LnsL0lvf3i3Ntupkp
405
407
  structify/types/admin/__init__.py,sha256=sI2YUoXg3jsKmOp7cu3j_z2PoGhwQ5x7aTf822d192k,3540
406
408
  structify/types/admin/admin_dataset_return.py,sha256=hRSCIBTiaCb1O1lcxsKyxnzGiYuIUEvnMxXkNbyj4_U,495
407
409
  structify/types/admin/admin_delete_jobs_response.py,sha256=c3IO7bCqAoEN9PHYM88gO0JT0e6hCaHyIBtxC84Tw38,225
408
- structify/types/admin/admin_list_jobs_response.py,sha256=2V64gFWFH3scKfj0P_Lo3d_YxoORoLdrm9sUPSwI6S0,4876
410
+ structify/types/admin/admin_list_jobs_response.py,sha256=27IoyuBSaBJPnSSyiZt3q5dHfeyBzrGtnq7nGvJcapM,4867
409
411
  structify/types/admin/admin_sandbox.py,sha256=ADrK4ugEA85vNCzKe7OYr8tzkf5nQWnnACfzmyBATvI,679
410
412
  structify/types/admin/admin_teams_list_response.py,sha256=FDrXxx3dQKuHuLfByYzaS5EzZYkZAtyJH7E7t6UoUX4,1167
411
413
  structify/types/admin/cancel_subscription_response.py,sha256=ph43hCKHtpjp2eSV9X9eitJA-mpnrRdjwe8KxPLJdKY,246
@@ -489,7 +491,7 @@ structify/types/user/stripe_create_portal_session_params.py,sha256=5AYRC8z_SlKmd
489
491
  structify/types/user/stripe_create_session_params.py,sha256=DFcNLNzEWeupkGQ9J5PafsuL_bIU9cLEIhAmFPsRlfo,387
490
492
  structify/types/user/stripe_create_subscription_params.py,sha256=d8HfiC94gJbG-cC_WvBz6xYCvxKJO_EP2yyVmVvufrU,424
491
493
  structify/types/user/subscription_plan.py,sha256=qKJMM-zPpYolYC1DlypOwPpxlyJBLkQqFK_0VpwktJs,222
492
- structifyai-1.174.0.dist-info/METADATA,sha256=_cBl4jOw0K_TtGioyYQoTH0ezXr8x4NPfipk2wWlzVk,16399
493
- structifyai-1.174.0.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
494
- structifyai-1.174.0.dist-info/licenses/LICENSE,sha256=9CwgrmGz3rZSTT-KqGc1gua-7g8B4ThTgMtUgPALh5c,11339
495
- structifyai-1.174.0.dist-info/RECORD,,
494
+ structifyai-1.175.0.dist-info/METADATA,sha256=tnYJNNHg3u1mib1jjY1qNCerum6ifMs8NTMJuymRUBo,16399
495
+ structifyai-1.175.0.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
496
+ structifyai-1.175.0.dist-info/licenses/LICENSE,sha256=9CwgrmGz3rZSTT-KqGc1gua-7g8B4ThTgMtUgPALh5c,11339
497
+ structifyai-1.175.0.dist-info/RECORD,,