structifyai 1.172.0__py3-none-any.whl → 1.174.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
structify/_version.py CHANGED
@@ -1,4 +1,4 @@
1
1
  # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
2
 
3
3
  __title__ = "structify"
4
- __version__ = "1.172.0" # x-release-please-version
4
+ __version__ = "1.174.0" # x-release-please-version
@@ -935,20 +935,24 @@ class PolarsResource(SyncAPIResource):
935
935
  # Wait for all PDF processing jobs to complete
936
936
  self._client.jobs.wait_for_jobs(job_ids=job_ids, title=f"Parsing {table_name} from PDFs", node_id=node_id)
937
937
 
938
- # Collect results from all processed PDFs
938
+ # Collect results from all processed PDFs - each result is tagged with its source row_idx
939
939
  structured_results: list[dict[str, Any]] = []
940
940
 
941
941
  def collect_pdf_results(row_idx: int, dataset_name: str) -> List[Dict[str, Any]]:
942
942
  pdf_path = batch_rows[row_idx][path_column]
943
943
  entities_result = self._client.datasets.view_table(dataset=dataset_name, name=table_name)
944
- return [{**entity.properties, path_column: pdf_path} for entity in entities_result]
944
+ return [
945
+ {**entity.properties, path_column: pdf_path, "__row_idx__": row_idx} for entity in entities_result
946
+ ]
945
947
 
946
948
  with ThreadPoolExecutor(max_workers=MAX_PARALLEL_REQUESTS) as executor:
947
949
  collect_futures = [
948
950
  executor.submit(collect_pdf_results, row_idx, dataset_name)
949
951
  for row_idx, dataset_name in idx_to_dataset.items()
950
952
  ]
951
- for future in tqdm(as_completed(collect_futures), total=len(collect_futures), desc="Collecting PDF extractions"):
953
+ for future in tqdm(
954
+ as_completed(collect_futures), total=len(collect_futures), desc="Collecting PDF extractions"
955
+ ):
952
956
  results = future.result()
953
957
  structured_results.extend(results)
954
958
 
@@ -958,17 +962,12 @@ class PolarsResource(SyncAPIResource):
958
962
  if col_name not in result_row:
959
963
  result_row[col_name] = None
960
964
 
961
- # Create DataFrame with structured results
962
965
  if not structured_results:
963
- structured_df = pl.DataFrame(
964
- {col: pl.Series([], dtype=polars_schema[col]) for col in polars_schema.names()}
965
- )
966
- else:
967
- structured_df = pl.DataFrame(structured_results, schema=polars_schema)
966
+ return pl.DataFrame(schema=polars_schema)
968
967
 
969
- # Join with original batch to preserve any additional columns
970
- joined_df = batch_df.join(structured_df, on=path_column, how="left")
971
- return joined_df
968
+ # Build result dataframe directly from structured_results without joining
969
+ # Each entity is already tagged with path_column from its source PDF
970
+ return pl.DataFrame(structured_results, schema=polars_schema)
972
971
 
973
972
  return document_paths.map_batches(structure_batch, schema=polars_schema, no_optimizations=True)
974
973
 
@@ -44,6 +44,7 @@ class Config(TypedDict, total=False):
44
44
  "bedrock.claude-sonnet-4-bedrock",
45
45
  "bedrock.claude-sonnet-4-5-bedrock",
46
46
  "bedrock.claude-opus-4-5-bedrock",
47
+ "bedrock.claude-haiku-4-5-bedrock",
47
48
  "gemini.gemini-2.5-pro",
48
49
  "gemini.gemini-2.5-flash",
49
50
  "gemini.gemini-3-pro-preview",
@@ -48,6 +48,7 @@ class Config(TypedDict, total=False):
48
48
  "bedrock.claude-sonnet-4-bedrock",
49
49
  "bedrock.claude-sonnet-4-5-bedrock",
50
50
  "bedrock.claude-opus-4-5-bedrock",
51
+ "bedrock.claude-haiku-4-5-bedrock",
51
52
  "gemini.gemini-2.5-pro",
52
53
  "gemini.gemini-2.5-flash",
53
54
  "gemini.gemini-3-pro-preview",
@@ -5,10 +5,18 @@ from typing import List
5
5
  from .._models import BaseModel
6
6
  from .connector_catalog_with_methods import ConnectorCatalogWithMethods
7
7
 
8
- __all__ = ["ConnectorCatalogListResponse"]
8
+ __all__ = ["ConnectorCatalogListResponse", "CategoryCount"]
9
+
10
+
11
+ class CategoryCount(BaseModel):
12
+ category: str
13
+
14
+ count: int
9
15
 
10
16
 
11
17
  class ConnectorCatalogListResponse(BaseModel):
18
+ category_counts: List[CategoryCount]
19
+
12
20
  items: List[ConnectorCatalogWithMethods]
13
21
 
14
22
  total_count: int
@@ -18,12 +18,14 @@ __all__ = [
18
18
  "DerivedProperty",
19
19
  "Failed",
20
20
  "Completed",
21
+ "CacheHit",
21
22
  "AttemptedMatch",
22
23
  "DatahubPageFetched",
23
24
  "DatahubDatabasesCreated",
24
25
  "DatahubSchemasCreated",
25
26
  "DatahubTablesProcessed",
26
27
  "DatahubEmbeddingBatch",
28
+ "ViewedPdfPage",
27
29
  ]
28
30
 
29
31
 
@@ -106,6 +108,14 @@ class Completed(BaseModel):
106
108
  message: Optional[str] = None
107
109
 
108
110
 
111
+ class CacheHit(BaseModel):
112
+ cached_from_job_id: str
113
+
114
+ event_type: Literal["cache_hit"]
115
+
116
+ message: Optional[str] = None
117
+
118
+
109
119
  class AttemptedMatch(BaseModel):
110
120
  candidates: List[Dict[str, Union[str, bool, float]]]
111
121
 
@@ -117,6 +127,8 @@ class AttemptedMatch(BaseModel):
117
127
 
118
128
  match_idx: Optional[int] = None
119
129
 
130
+ raw_text: Optional[str] = None
131
+
120
132
 
121
133
  class DatahubPageFetched(BaseModel):
122
134
  datasets_in_page: int
@@ -164,6 +176,12 @@ class DatahubEmbeddingBatch(BaseModel):
164
176
  total_batches: int
165
177
 
166
178
 
179
+ class ViewedPdfPage(BaseModel):
180
+ event_type: Literal["viewed_pdf_page"]
181
+
182
+ page_index: int
183
+
184
+
167
185
  JobEventBody: TypeAlias = Annotated[
168
186
  Union[
169
187
  AgentNavigated,
@@ -175,12 +193,14 @@ JobEventBody: TypeAlias = Annotated[
175
193
  DerivedProperty,
176
194
  Failed,
177
195
  Completed,
196
+ CacheHit,
178
197
  AttemptedMatch,
179
198
  DatahubPageFetched,
180
199
  DatahubDatabasesCreated,
181
200
  DatahubSchemasCreated,
182
201
  DatahubTablesProcessed,
183
202
  DatahubEmbeddingBatch,
203
+ ViewedPdfPage,
184
204
  ],
185
205
  PropertyInfo(discriminator="event_type"),
186
206
  ]
@@ -1,10 +1,11 @@
1
1
  # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
2
 
3
- from typing import List, Union, Optional
3
+ from typing import Dict, List, Union, Optional
4
4
  from typing_extensions import Literal, Annotated, TypeAlias
5
5
 
6
6
  from .._utils import PropertyInfo
7
7
  from .._models import BaseModel
8
+ from .knowledge_graph import KnowledgeGraph
8
9
 
9
10
  __all__ = [
10
11
  "ToolInvocation",
@@ -12,6 +13,21 @@ __all__ = [
12
13
  "WebSearchInput",
13
14
  "WebNavigate",
14
15
  "WebNavigateInput",
16
+ "ViewPage",
17
+ "ViewPageInput",
18
+ "Save",
19
+ "SaveInput",
20
+ "SaveEntities",
21
+ "SaveEntitiesInput",
22
+ "Exit",
23
+ "ExitInput",
24
+ "APIExecute",
25
+ "APIExecuteInput",
26
+ "Javascript",
27
+ "JavascriptInput",
28
+ "NavigateToIFrame",
29
+ "NavigateToIFrameInput",
30
+ "InfiniteScroll",
15
31
  "InspectStep",
16
32
  "InspectStepInput",
17
33
  "ReadNodeLogs",
@@ -66,6 +82,8 @@ class WebSearch(BaseModel):
66
82
  class WebNavigateInput(BaseModel):
67
83
  url: str
68
84
 
85
+ output_format: Optional[Literal["Text", "Visual"]] = None
86
+
69
87
 
70
88
  class WebNavigate(BaseModel):
71
89
  input: WebNavigateInput
@@ -73,6 +91,95 @@ class WebNavigate(BaseModel):
73
91
  name: Literal["WebNavigate"]
74
92
 
75
93
 
94
+ class ViewPageInput(BaseModel):
95
+ page_number: int
96
+
97
+
98
+ class ViewPage(BaseModel):
99
+ input: ViewPageInput
100
+
101
+ name: Literal["ViewPage"]
102
+
103
+
104
+ class SaveInput(BaseModel):
105
+ knowledge_graph: KnowledgeGraph
106
+ """
107
+ Knowledge graph info structured to deserialize and display in the same format
108
+ that the LLM outputs. Also the first representation of an LLM output in the
109
+ pipeline from raw tool output to being merged into a DB
110
+ """
111
+
112
+ reason: str
113
+
114
+ sources: List[str]
115
+
116
+
117
+ class Save(BaseModel):
118
+ input: SaveInput
119
+
120
+ name: Literal["Save"]
121
+
122
+
123
+ class SaveEntitiesInput(BaseModel):
124
+ entities: List[Dict[str, Dict[str, object]]]
125
+
126
+ reason: str
127
+
128
+ sources: List[str]
129
+
130
+
131
+ class SaveEntities(BaseModel):
132
+ input: SaveEntitiesInput
133
+
134
+ name: Literal["SaveEntities"]
135
+
136
+
137
+ class ExitInput(BaseModel):
138
+ reason: str
139
+
140
+
141
+ class Exit(BaseModel):
142
+ input: ExitInput
143
+
144
+ name: Literal["Exit"]
145
+
146
+
147
+ class APIExecuteInput(BaseModel):
148
+ code: str
149
+
150
+
151
+ class APIExecute(BaseModel):
152
+ input: APIExecuteInput
153
+
154
+ name: Literal["ApiExecute"]
155
+
156
+
157
+ class JavascriptInput(BaseModel):
158
+ code: str
159
+
160
+
161
+ class Javascript(BaseModel):
162
+ input: JavascriptInput
163
+
164
+ name: Literal["Javascript"]
165
+
166
+
167
+ class NavigateToIFrameInput(BaseModel):
168
+ index: int
169
+
170
+
171
+ class NavigateToIFrame(BaseModel):
172
+ input: NavigateToIFrameInput
173
+
174
+ name: Literal["NavigateToIFrame"]
175
+
176
+
177
+ class InfiniteScroll(BaseModel):
178
+ input: object
179
+
180
+ name: Literal["InfiniteScroll"]
181
+
182
+
76
183
  class InspectStepInput(BaseModel):
77
184
  step_name: str
78
185
 
@@ -329,6 +436,14 @@ ToolInvocation: TypeAlias = Annotated[
329
436
  Union[
330
437
  WebSearch,
331
438
  WebNavigate,
439
+ ViewPage,
440
+ Save,
441
+ SaveEntities,
442
+ Exit,
443
+ APIExecute,
444
+ Javascript,
445
+ NavigateToIFrame,
446
+ InfiniteScroll,
332
447
  InspectStep,
333
448
  ReadNodeLogs,
334
449
  DeleteFile,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: structifyai
3
- Version: 1.172.0
3
+ Version: 1.174.0
4
4
  Summary: The official Python library for the structify API
5
5
  Project-URL: Homepage, https://github.com/StructifyAI/structify-python
6
6
  Project-URL: Repository, https://github.com/StructifyAI/structify-python
@@ -11,7 +11,7 @@ structify/_resource.py,sha256=tJi4pDQooQZ_zJwEwrLj-U-ye2hC-cbmr1GzIwCT10Y,1118
11
11
  structify/_response.py,sha256=RuNhMDiZUdPqEbmFJHDVI4FMPDszk8QjK9LVWm1Fagk,28806
12
12
  structify/_streaming.py,sha256=n4C9M7ITmANYn9LaWHNoqJdIIyF7svLco2qst7u3M7U,10233
13
13
  structify/_types.py,sha256=jj4p-m3vpUma0AdhPWIaljHZXeb4RKnrAusjVdpDy5Y,7597
14
- structify/_version.py,sha256=Rx9zou1rDU7TWT-wTjB0rgLPSL1e7FlGBViJXZzQX60,163
14
+ structify/_version.py,sha256=jAkquXwE8e7XiNAPLSl0tNkX5WyP7kZ2giJmBdnOmlM,163
15
15
  structify/pagination.py,sha256=ycybhWcpKk4ztsMcCA6C0WZiJejGrSx6bSr8LLskJUY,4346
16
16
  structify/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
17
  structify/_utils/__init__.py,sha256=7fch0GT9zpNnErbciSpUNa-SjTxxjY6kxHxKMOM4AGs,2305
@@ -39,7 +39,7 @@ structify/resources/external_dataframe_proxy.py,sha256=DSn0YwWIembR__ZtDxVCJtyY3
39
39
  structify/resources/jobs.py,sha256=gO1aSByi1dMvW90UDsMmNhLHFCOY4ENLkZcAx4gbLHY,30108
40
40
  structify/resources/match.py,sha256=gDWEWnKwEoLbheQAMFltJCk2ysa_L9AuJMOaauM7c4Y,12248
41
41
  structify/resources/nango.py,sha256=Zl0M1XhlVe61jHVd-SdYI9uEbEhIRmskvlk7Xp0Lh8g,9166
42
- structify/resources/polars.py,sha256=nE-I3kX552BOjcyn4ZJFlAY2-_Y0LM0Pta1653-uYCg,61613
42
+ structify/resources/polars.py,sha256=--kVVUoJ7AvGirnc462r0TRsK7lcXXDQD0XWhqtG3K8,61552
43
43
  structify/resources/projects.py,sha256=YDikBDB9D1EXyZ2GyRx4GlpQ83snw51YlNuU1sLHqho,14117
44
44
  structify/resources/public_sessions.py,sha256=_JmssE0MMjeGdxT0FWtrkcceSV4skeEkVGYeO2FkJes,9976
45
45
  structify/resources/sandbox.py,sha256=Gc7uxZAOmbXA477UHvS244BokI1Tte_34xxIj0dC5PA,16984
@@ -90,7 +90,7 @@ structify/types/chat_admin_issue_found_params.py,sha256=N5YMgm6O-Yr3sXZb2sWDHdyN
90
90
  structify/types/chat_copy_node_output_by_code_hash_params.py,sha256=gP4V2Y0_Rb4-kwoEceClJLgjVdWv7bgcI4qqQIvdNv8,356
91
91
  structify/types/chat_copy_node_output_by_code_hash_response.py,sha256=Cst1RK9jDrMAKEyzLIk5_-D6YUbO-AIVllYui2sy2aQ,272
92
92
  structify/types/chat_copy_params.py,sha256=tfA3jdduKDJeHiYjWWKdHifRMp1sCGyGQcYHFuMFdnE,425
93
- structify/types/chat_create_session_params.py,sha256=_ugvArrIDJDP1zT8o_mFCEGkZOwv67xl5nsMPIrMvQE,1762
93
+ structify/types/chat_create_session_params.py,sha256=QSkL1Z0Vj0sT8AJ8mcDZQIqAwUkbhiQHuF6811veMmc,1810
94
94
  structify/types/chat_delete_files_params.py,sha256=Rv24bWe1CK524xobl9-_APx5GG8KTqIwEtUgYIUlMXc,343
95
95
  structify/types/chat_delete_files_response.py,sha256=ZoFJjfZqn_rVuwiFhsuFigN_AQHh_DRkb5KRj7J_49g,225
96
96
  structify/types/chat_dependency.py,sha256=J8JLY6kBjFt4dgf-_Vk_HEjxS5R6_6VnTQue2JzZRug,415
@@ -114,13 +114,13 @@ structify/types/chat_update_session_favorite_params.py,sha256=UNi0YujSs1gCSry2jK
114
114
  structify/types/chat_update_session_params.py,sha256=5NQZx-j-W2xQGlX0GU-9vwyQt0QunlifWY2MpRtrEv8,382
115
115
  structify/types/chat_update_visibility_params.py,sha256=uWtgq4zb_TSO4hIsY0FmdWaljB_20wpjl6IQhRMibTg,361
116
116
  structify/types/chat_visibility.py,sha256=GpTgjpcupE1z9Iul8fxOrlMoP_2wjQ_7Z0rJ2y9qO5Q,244
117
- structify/types/code_generate_code_params.py,sha256=NzDMzMT7IpzrL5bHhC4m3k0B7R5N5WUbiyCmTudM2oE,2066
117
+ structify/types/code_generate_code_params.py,sha256=cX5HYhmrv-9gBZcYMy9jJ8h4vMewt-Zy5jVfAYJsirA,2114
118
118
  structify/types/code_interrupt_generation_params.py,sha256=1Y9VOgObIJFyYgAEkUuWZRKKV5-4HcoRA6p5iSEnF3s,410
119
119
  structify/types/connector.py,sha256=hqfk8x1ZM39idvAd4wXLm1QNrnT3kRgxEuhk8O28-B0,1069
120
120
  structify/types/connector_auth_method.py,sha256=iHBmcNbi74mDjFd_m4-HrGrZoV9_WRSFtrOY0fz9NhQ,562
121
121
  structify/types/connector_auth_method_with_fields.py,sha256=EABCugmJ8ahZNhSqvNQAESjHpD3kozh4GPop2OZpSMw,519
122
122
  structify/types/connector_catalog_list_params.py,sha256=vqyYfA7M3STeDKKshpD9FthkmoSh4LJecrBFhVGbBJs,470
123
- structify/types/connector_catalog_list_response.py,sha256=Mr9KRSF1oi2HauT_Hvkb8CYjdMFIG3Zc2lb5U8itOhE,376
123
+ structify/types/connector_catalog_list_response.py,sha256=0e1AQU59zTfN-bvx1GCUElnaEiyW104XHUIYvZ4Gkmg,503
124
124
  structify/types/connector_catalog_with_methods.py,sha256=EZdXqN6oZggX-SYXUrn_hDgCRHeMY1zNo7pY1q0vX4E,532
125
125
  structify/types/connector_category.py,sha256=2YnDoj4lXtyfRTVhSi-7a5InFuOUn26SqeVeaCSS4lE,245
126
126
  structify/types/connector_create_params.py,sha256=Xj2MWkiWlMhgCc-Oo3udVGlXwSjU7jy8f0WpKgRFbj8,961
@@ -259,7 +259,7 @@ structify/types/granularity.py,sha256=At6biWApGE7uE8jr5KnHP9Jr1yPFkuqLwXjExaswtB
259
259
  structify/types/image.py,sha256=FpYU3gDZnet0wO17e2uHzcyRUD6E1ssSgv63Ew0DzjU,269
260
260
  structify/types/invitation_details_response.py,sha256=TmyeM4mW4Kb6L0d7Ook9cH3g8vzfQYPnvZDIBdgVAO4,272
261
261
  structify/types/job_cancel_response.py,sha256=y8M8qPkcXT-pTi4IwQ0JBJQzXeAQIs3u2OsaVeGBTtc,1224
262
- structify/types/job_event_body.py,sha256=CUZt8TK6d46qf4KYBxH0mFYaiyoDJT4pGAUZMhGrwC4,3564
262
+ structify/types/job_event_body.py,sha256=pO11fTm5sGvQ6cx7NvWTitUaunSqdRo-d5tkyh4KPsk,3906
263
263
  structify/types/job_get_scrapers_response.py,sha256=-E9DaG9YCkrJeuffZ3RiI6MW7ZsbPjmlVU3jUQdggSY,694
264
264
  structify/types/job_get_source_entities_response.py,sha256=vlGKFkMKjII48EexIEBbls6-PAVYDDkmR1__aKjuiUo,3379
265
265
  structify/types/job_list_params.py,sha256=i_MYi4vIFC6Dq5I4E4htYwZejqRE5qAf2lgg_SRBuKg,1012
@@ -367,7 +367,7 @@ structify/types/team_wiki_page.py,sha256=PDcFqG8mc10Ejnh6YeoSdalNorn40axi7K3ppmh
367
367
  structify/types/team_with_role.py,sha256=t-UgnD90UmxlMpLHdxN8L7kK7VxMVLj-PM26NvkV_2U,327
368
368
  structify/types/teams_link_code_response.py,sha256=p81nvCP4KeGVUgQiiaFwAlLMRaGnG4Iv7rp03WFG0uA,269
369
369
  structify/types/token_response.py,sha256=N75RMh9ZF05z5Ubc_Pu2pePe3MN7Yqn2GB1FJ8FQJZ0,408
370
- structify/types/tool_invocation.py,sha256=5D_zxMZmI3omAvGHTJSmCeuZMgMQXhbtomWh0f2Ll-k,5987
370
+ structify/types/tool_invocation.py,sha256=HCc_nFmfjm5MnoDKDbtnh7TIvLEH7gvsBI4IIU0AxO4,7995
371
371
  structify/types/tool_metadata.py,sha256=drtuS9ZQm4cB2KScB1pycX1ifjhGwSUMJm9TFv1SHuI,382
372
372
  structify/types/tool_result.py,sha256=C7CSxU0PfW4WMiWlwDckEmsjTbZmWzEoSt2LbcYdAtc,2013
373
373
  structify/types/update_member_role_response.py,sha256=q1jTM0lFmzvH8ki7GcSQAh-5cN7VnLF6Jg_IRdA4AqE,222
@@ -489,7 +489,7 @@ structify/types/user/stripe_create_portal_session_params.py,sha256=5AYRC8z_SlKmd
489
489
  structify/types/user/stripe_create_session_params.py,sha256=DFcNLNzEWeupkGQ9J5PafsuL_bIU9cLEIhAmFPsRlfo,387
490
490
  structify/types/user/stripe_create_subscription_params.py,sha256=d8HfiC94gJbG-cC_WvBz6xYCvxKJO_EP2yyVmVvufrU,424
491
491
  structify/types/user/subscription_plan.py,sha256=qKJMM-zPpYolYC1DlypOwPpxlyJBLkQqFK_0VpwktJs,222
492
- structifyai-1.172.0.dist-info/METADATA,sha256=wA_nnCaPjieihaBzi0fUawgTBLp4syLr_bLKFFZKz9o,16399
493
- structifyai-1.172.0.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
494
- structifyai-1.172.0.dist-info/licenses/LICENSE,sha256=9CwgrmGz3rZSTT-KqGc1gua-7g8B4ThTgMtUgPALh5c,11339
495
- structifyai-1.172.0.dist-info/RECORD,,
492
+ structifyai-1.174.0.dist-info/METADATA,sha256=_cBl4jOw0K_TtGioyYQoTH0ezXr8x4NPfipk2wWlzVk,16399
493
+ structifyai-1.174.0.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
494
+ structifyai-1.174.0.dist-info/licenses/LICENSE,sha256=9CwgrmGz3rZSTT-KqGc1gua-7g8B4ThTgMtUgPALh5c,11339
495
+ structifyai-1.174.0.dist-info/RECORD,,