structifyai 1.172.0__py3-none-any.whl → 1.173.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
structify/_version.py CHANGED
@@ -1,4 +1,4 @@
1
1
  # File generated from our OpenAPI spec by Stainless. See CONTRIBUTING.md for details.
2
2
 
3
3
  __title__ = "structify"
4
- __version__ = "1.172.0" # x-release-please-version
4
+ __version__ = "1.173.0" # x-release-please-version
@@ -935,20 +935,24 @@ class PolarsResource(SyncAPIResource):
935
935
  # Wait for all PDF processing jobs to complete
936
936
  self._client.jobs.wait_for_jobs(job_ids=job_ids, title=f"Parsing {table_name} from PDFs", node_id=node_id)
937
937
 
938
- # Collect results from all processed PDFs
938
+ # Collect results from all processed PDFs - each result is tagged with its source row_idx
939
939
  structured_results: list[dict[str, Any]] = []
940
940
 
941
941
  def collect_pdf_results(row_idx: int, dataset_name: str) -> List[Dict[str, Any]]:
942
942
  pdf_path = batch_rows[row_idx][path_column]
943
943
  entities_result = self._client.datasets.view_table(dataset=dataset_name, name=table_name)
944
- return [{**entity.properties, path_column: pdf_path} for entity in entities_result]
944
+ return [
945
+ {**entity.properties, path_column: pdf_path, "__row_idx__": row_idx} for entity in entities_result
946
+ ]
945
947
 
946
948
  with ThreadPoolExecutor(max_workers=MAX_PARALLEL_REQUESTS) as executor:
947
949
  collect_futures = [
948
950
  executor.submit(collect_pdf_results, row_idx, dataset_name)
949
951
  for row_idx, dataset_name in idx_to_dataset.items()
950
952
  ]
951
- for future in tqdm(as_completed(collect_futures), total=len(collect_futures), desc="Collecting PDF extractions"):
953
+ for future in tqdm(
954
+ as_completed(collect_futures), total=len(collect_futures), desc="Collecting PDF extractions"
955
+ ):
952
956
  results = future.result()
953
957
  structured_results.extend(results)
954
958
 
@@ -958,17 +962,12 @@ class PolarsResource(SyncAPIResource):
958
962
  if col_name not in result_row:
959
963
  result_row[col_name] = None
960
964
 
961
- # Create DataFrame with structured results
962
965
  if not structured_results:
963
- structured_df = pl.DataFrame(
964
- {col: pl.Series([], dtype=polars_schema[col]) for col in polars_schema.names()}
965
- )
966
- else:
967
- structured_df = pl.DataFrame(structured_results, schema=polars_schema)
966
+ return pl.DataFrame(schema=polars_schema)
968
967
 
969
- # Join with original batch to preserve any additional columns
970
- joined_df = batch_df.join(structured_df, on=path_column, how="left")
971
- return joined_df
968
+ # Build result dataframe directly from structured_results without joining
969
+ # Each entity is already tagged with path_column from its source PDF
970
+ return pl.DataFrame(structured_results, schema=polars_schema)
972
971
 
973
972
  return document_paths.map_batches(structure_batch, schema=polars_schema, no_optimizations=True)
974
973
 
@@ -44,6 +44,7 @@ class Config(TypedDict, total=False):
44
44
  "bedrock.claude-sonnet-4-bedrock",
45
45
  "bedrock.claude-sonnet-4-5-bedrock",
46
46
  "bedrock.claude-opus-4-5-bedrock",
47
+ "bedrock.claude-haiku-4-5-bedrock",
47
48
  "gemini.gemini-2.5-pro",
48
49
  "gemini.gemini-2.5-flash",
49
50
  "gemini.gemini-3-pro-preview",
@@ -48,6 +48,7 @@ class Config(TypedDict, total=False):
48
48
  "bedrock.claude-sonnet-4-bedrock",
49
49
  "bedrock.claude-sonnet-4-5-bedrock",
50
50
  "bedrock.claude-opus-4-5-bedrock",
51
+ "bedrock.claude-haiku-4-5-bedrock",
51
52
  "gemini.gemini-2.5-pro",
52
53
  "gemini.gemini-2.5-flash",
53
54
  "gemini.gemini-3-pro-preview",
@@ -24,6 +24,7 @@ __all__ = [
24
24
  "DatahubSchemasCreated",
25
25
  "DatahubTablesProcessed",
26
26
  "DatahubEmbeddingBatch",
27
+ "ViewedPdfPage",
27
28
  ]
28
29
 
29
30
 
@@ -164,6 +165,12 @@ class DatahubEmbeddingBatch(BaseModel):
164
165
  total_batches: int
165
166
 
166
167
 
168
+ class ViewedPdfPage(BaseModel):
169
+ event_type: Literal["viewed_pdf_page"]
170
+
171
+ page_index: int
172
+
173
+
167
174
  JobEventBody: TypeAlias = Annotated[
168
175
  Union[
169
176
  AgentNavigated,
@@ -181,6 +188,7 @@ JobEventBody: TypeAlias = Annotated[
181
188
  DatahubSchemasCreated,
182
189
  DatahubTablesProcessed,
183
190
  DatahubEmbeddingBatch,
191
+ ViewedPdfPage,
184
192
  ],
185
193
  PropertyInfo(discriminator="event_type"),
186
194
  ]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: structifyai
3
- Version: 1.172.0
3
+ Version: 1.173.0
4
4
  Summary: The official Python library for the structify API
5
5
  Project-URL: Homepage, https://github.com/StructifyAI/structify-python
6
6
  Project-URL: Repository, https://github.com/StructifyAI/structify-python
@@ -11,7 +11,7 @@ structify/_resource.py,sha256=tJi4pDQooQZ_zJwEwrLj-U-ye2hC-cbmr1GzIwCT10Y,1118
11
11
  structify/_response.py,sha256=RuNhMDiZUdPqEbmFJHDVI4FMPDszk8QjK9LVWm1Fagk,28806
12
12
  structify/_streaming.py,sha256=n4C9M7ITmANYn9LaWHNoqJdIIyF7svLco2qst7u3M7U,10233
13
13
  structify/_types.py,sha256=jj4p-m3vpUma0AdhPWIaljHZXeb4RKnrAusjVdpDy5Y,7597
14
- structify/_version.py,sha256=Rx9zou1rDU7TWT-wTjB0rgLPSL1e7FlGBViJXZzQX60,163
14
+ structify/_version.py,sha256=WcTGZuYh0GCneqsaePXe_535r8UvSvOWAg183seZe-g,163
15
15
  structify/pagination.py,sha256=ycybhWcpKk4ztsMcCA6C0WZiJejGrSx6bSr8LLskJUY,4346
16
16
  structify/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
17
17
  structify/_utils/__init__.py,sha256=7fch0GT9zpNnErbciSpUNa-SjTxxjY6kxHxKMOM4AGs,2305
@@ -39,7 +39,7 @@ structify/resources/external_dataframe_proxy.py,sha256=DSn0YwWIembR__ZtDxVCJtyY3
39
39
  structify/resources/jobs.py,sha256=gO1aSByi1dMvW90UDsMmNhLHFCOY4ENLkZcAx4gbLHY,30108
40
40
  structify/resources/match.py,sha256=gDWEWnKwEoLbheQAMFltJCk2ysa_L9AuJMOaauM7c4Y,12248
41
41
  structify/resources/nango.py,sha256=Zl0M1XhlVe61jHVd-SdYI9uEbEhIRmskvlk7Xp0Lh8g,9166
42
- structify/resources/polars.py,sha256=nE-I3kX552BOjcyn4ZJFlAY2-_Y0LM0Pta1653-uYCg,61613
42
+ structify/resources/polars.py,sha256=--kVVUoJ7AvGirnc462r0TRsK7lcXXDQD0XWhqtG3K8,61552
43
43
  structify/resources/projects.py,sha256=YDikBDB9D1EXyZ2GyRx4GlpQ83snw51YlNuU1sLHqho,14117
44
44
  structify/resources/public_sessions.py,sha256=_JmssE0MMjeGdxT0FWtrkcceSV4skeEkVGYeO2FkJes,9976
45
45
  structify/resources/sandbox.py,sha256=Gc7uxZAOmbXA477UHvS244BokI1Tte_34xxIj0dC5PA,16984
@@ -90,7 +90,7 @@ structify/types/chat_admin_issue_found_params.py,sha256=N5YMgm6O-Yr3sXZb2sWDHdyN
90
90
  structify/types/chat_copy_node_output_by_code_hash_params.py,sha256=gP4V2Y0_Rb4-kwoEceClJLgjVdWv7bgcI4qqQIvdNv8,356
91
91
  structify/types/chat_copy_node_output_by_code_hash_response.py,sha256=Cst1RK9jDrMAKEyzLIk5_-D6YUbO-AIVllYui2sy2aQ,272
92
92
  structify/types/chat_copy_params.py,sha256=tfA3jdduKDJeHiYjWWKdHifRMp1sCGyGQcYHFuMFdnE,425
93
- structify/types/chat_create_session_params.py,sha256=_ugvArrIDJDP1zT8o_mFCEGkZOwv67xl5nsMPIrMvQE,1762
93
+ structify/types/chat_create_session_params.py,sha256=QSkL1Z0Vj0sT8AJ8mcDZQIqAwUkbhiQHuF6811veMmc,1810
94
94
  structify/types/chat_delete_files_params.py,sha256=Rv24bWe1CK524xobl9-_APx5GG8KTqIwEtUgYIUlMXc,343
95
95
  structify/types/chat_delete_files_response.py,sha256=ZoFJjfZqn_rVuwiFhsuFigN_AQHh_DRkb5KRj7J_49g,225
96
96
  structify/types/chat_dependency.py,sha256=J8JLY6kBjFt4dgf-_Vk_HEjxS5R6_6VnTQue2JzZRug,415
@@ -114,7 +114,7 @@ structify/types/chat_update_session_favorite_params.py,sha256=UNi0YujSs1gCSry2jK
114
114
  structify/types/chat_update_session_params.py,sha256=5NQZx-j-W2xQGlX0GU-9vwyQt0QunlifWY2MpRtrEv8,382
115
115
  structify/types/chat_update_visibility_params.py,sha256=uWtgq4zb_TSO4hIsY0FmdWaljB_20wpjl6IQhRMibTg,361
116
116
  structify/types/chat_visibility.py,sha256=GpTgjpcupE1z9Iul8fxOrlMoP_2wjQ_7Z0rJ2y9qO5Q,244
117
- structify/types/code_generate_code_params.py,sha256=NzDMzMT7IpzrL5bHhC4m3k0B7R5N5WUbiyCmTudM2oE,2066
117
+ structify/types/code_generate_code_params.py,sha256=cX5HYhmrv-9gBZcYMy9jJ8h4vMewt-Zy5jVfAYJsirA,2114
118
118
  structify/types/code_interrupt_generation_params.py,sha256=1Y9VOgObIJFyYgAEkUuWZRKKV5-4HcoRA6p5iSEnF3s,410
119
119
  structify/types/connector.py,sha256=hqfk8x1ZM39idvAd4wXLm1QNrnT3kRgxEuhk8O28-B0,1069
120
120
  structify/types/connector_auth_method.py,sha256=iHBmcNbi74mDjFd_m4-HrGrZoV9_WRSFtrOY0fz9NhQ,562
@@ -259,7 +259,7 @@ structify/types/granularity.py,sha256=At6biWApGE7uE8jr5KnHP9Jr1yPFkuqLwXjExaswtB
259
259
  structify/types/image.py,sha256=FpYU3gDZnet0wO17e2uHzcyRUD6E1ssSgv63Ew0DzjU,269
260
260
  structify/types/invitation_details_response.py,sha256=TmyeM4mW4Kb6L0d7Ook9cH3g8vzfQYPnvZDIBdgVAO4,272
261
261
  structify/types/job_cancel_response.py,sha256=y8M8qPkcXT-pTi4IwQ0JBJQzXeAQIs3u2OsaVeGBTtc,1224
262
- structify/types/job_event_body.py,sha256=CUZt8TK6d46qf4KYBxH0mFYaiyoDJT4pGAUZMhGrwC4,3564
262
+ structify/types/job_event_body.py,sha256=C3RfyefsBgWrFR_KAK7-Tzmlj6B3f4JVR6iR0HYt1Ks,3706
263
263
  structify/types/job_get_scrapers_response.py,sha256=-E9DaG9YCkrJeuffZ3RiI6MW7ZsbPjmlVU3jUQdggSY,694
264
264
  structify/types/job_get_source_entities_response.py,sha256=vlGKFkMKjII48EexIEBbls6-PAVYDDkmR1__aKjuiUo,3379
265
265
  structify/types/job_list_params.py,sha256=i_MYi4vIFC6Dq5I4E4htYwZejqRE5qAf2lgg_SRBuKg,1012
@@ -489,7 +489,7 @@ structify/types/user/stripe_create_portal_session_params.py,sha256=5AYRC8z_SlKmd
489
489
  structify/types/user/stripe_create_session_params.py,sha256=DFcNLNzEWeupkGQ9J5PafsuL_bIU9cLEIhAmFPsRlfo,387
490
490
  structify/types/user/stripe_create_subscription_params.py,sha256=d8HfiC94gJbG-cC_WvBz6xYCvxKJO_EP2yyVmVvufrU,424
491
491
  structify/types/user/subscription_plan.py,sha256=qKJMM-zPpYolYC1DlypOwPpxlyJBLkQqFK_0VpwktJs,222
492
- structifyai-1.172.0.dist-info/METADATA,sha256=wA_nnCaPjieihaBzi0fUawgTBLp4syLr_bLKFFZKz9o,16399
493
- structifyai-1.172.0.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
494
- structifyai-1.172.0.dist-info/licenses/LICENSE,sha256=9CwgrmGz3rZSTT-KqGc1gua-7g8B4ThTgMtUgPALh5c,11339
495
- structifyai-1.172.0.dist-info/RECORD,,
492
+ structifyai-1.173.0.dist-info/METADATA,sha256=YYeHg00958d2UCUx9tOQB5WRAOIqsU3WZ-Ps3vxzKcU,16399
493
+ structifyai-1.173.0.dist-info/WHEEL,sha256=C2FUgwZgiLbznR-k0b_5k3Ai_1aASOXDss3lzCUsUug,87
494
+ structifyai-1.173.0.dist-info/licenses/LICENSE,sha256=9CwgrmGz3rZSTT-KqGc1gua-7g8B4ThTgMtUgPALh5c,11339
495
+ structifyai-1.173.0.dist-info/RECORD,,