structifyai 1.178.0__py3-none-any.whl → 1.180.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- structify/_version.py +1 -1
- structify/resources/connectors/connectors.py +357 -1
- structify/resources/polars.py +55 -42
- structify/resources/slack.py +8 -8
- structify/resources/wiki.py +23 -18
- structify/types/__init__.py +4 -1
- structify/types/chat_create_session_params.py +1 -0
- structify/types/code_generate_code_params.py +1 -0
- structify/types/connector_add_schema_object_params.py +59 -0
- structify/types/connector_add_schema_object_response.py +35 -0
- structify/types/llm_information_store.py +4 -0
- structify/types/slack_event_payload_param.py +2 -2
- structify/types/slack_events_params.py +2 -2
- structify/types/wiki_create_params.py +1 -2
- structify/types/wiki_create_response.py +23 -0
- structify/types/wiki_list_response.py +22 -3
- structify/types/wiki_page_with_references.py +18 -2
- structify/types/wiki_update_params.py +4 -2
- structify/types/wiki_update_response.py +23 -0
- {structifyai-1.178.0.dist-info → structifyai-1.180.0.dist-info}/METADATA +1 -1
- {structifyai-1.178.0.dist-info → structifyai-1.180.0.dist-info}/RECORD +23 -20
- structify/types/team_wiki_page.py +0 -28
- {structifyai-1.178.0.dist-info → structifyai-1.180.0.dist-info}/WHEEL +0 -0
- {structifyai-1.178.0.dist-info → structifyai-1.180.0.dist-info}/licenses/LICENSE +0 -0
structify/_version.py
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
-
from typing import Dict, Optional
|
|
5
|
+
from typing import Any, Dict, Optional, cast
|
|
6
6
|
from typing_extensions import Literal, overload
|
|
7
7
|
|
|
8
8
|
import httpx
|
|
@@ -17,6 +17,7 @@ from ...types import (
|
|
|
17
17
|
connector_create_secret_params,
|
|
18
18
|
connector_search_tables_params,
|
|
19
19
|
connector_update_column_params,
|
|
20
|
+
connector_add_schema_object_params,
|
|
20
21
|
connector_get_explorer_chat_params,
|
|
21
22
|
connector_list_with_snippets_params,
|
|
22
23
|
connector_delete_schema_object_params,
|
|
@@ -53,6 +54,7 @@ from ...types.exploration_runs_response import ExplorationRunsResponse
|
|
|
53
54
|
from ...types.connector_summaries_response import ConnectorSummariesResponse
|
|
54
55
|
from ...types.delete_schema_object_response import DeleteSchemaObjectResponse
|
|
55
56
|
from ...types.connector_search_tables_response import ConnectorSearchTablesResponse
|
|
57
|
+
from ...types.connector_add_schema_object_response import ConnectorAddSchemaObjectResponse
|
|
56
58
|
from ...types.connector_list_with_snippets_response import ConnectorListWithSnippetsResponse
|
|
57
59
|
from ...types.connector_get_clarification_requests_response import ConnectorGetClarificationRequestsResponse
|
|
58
60
|
|
|
@@ -270,6 +272,177 @@ class ConnectorsResource(SyncAPIResource):
|
|
|
270
272
|
cast_to=NoneType,
|
|
271
273
|
)
|
|
272
274
|
|
|
275
|
+
@overload
|
|
276
|
+
def add_schema_object(
|
|
277
|
+
self,
|
|
278
|
+
connector_id: str,
|
|
279
|
+
*,
|
|
280
|
+
name: str,
|
|
281
|
+
type: Literal["database"],
|
|
282
|
+
description: Optional[str] | Omit = omit,
|
|
283
|
+
notes: Optional[str] | Omit = omit,
|
|
284
|
+
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
285
|
+
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
286
|
+
extra_headers: Headers | None = None,
|
|
287
|
+
extra_query: Query | None = None,
|
|
288
|
+
extra_body: Body | None = None,
|
|
289
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
290
|
+
) -> ConnectorAddSchemaObjectResponse:
|
|
291
|
+
"""
|
|
292
|
+
Args:
|
|
293
|
+
extra_headers: Send extra headers
|
|
294
|
+
|
|
295
|
+
extra_query: Add additional query parameters to the request
|
|
296
|
+
|
|
297
|
+
extra_body: Add additional JSON properties to the request
|
|
298
|
+
|
|
299
|
+
timeout: Override the client-level default timeout for this request, in seconds
|
|
300
|
+
"""
|
|
301
|
+
...
|
|
302
|
+
|
|
303
|
+
@overload
|
|
304
|
+
def add_schema_object(
|
|
305
|
+
self,
|
|
306
|
+
connector_id: str,
|
|
307
|
+
*,
|
|
308
|
+
database_id: str,
|
|
309
|
+
name: str,
|
|
310
|
+
type: Literal["schema"],
|
|
311
|
+
description: Optional[str] | Omit = omit,
|
|
312
|
+
notes: Optional[str] | Omit = omit,
|
|
313
|
+
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
314
|
+
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
315
|
+
extra_headers: Headers | None = None,
|
|
316
|
+
extra_query: Query | None = None,
|
|
317
|
+
extra_body: Body | None = None,
|
|
318
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
319
|
+
) -> ConnectorAddSchemaObjectResponse:
|
|
320
|
+
"""
|
|
321
|
+
Args:
|
|
322
|
+
extra_headers: Send extra headers
|
|
323
|
+
|
|
324
|
+
extra_query: Add additional query parameters to the request
|
|
325
|
+
|
|
326
|
+
extra_body: Add additional JSON properties to the request
|
|
327
|
+
|
|
328
|
+
timeout: Override the client-level default timeout for this request, in seconds
|
|
329
|
+
"""
|
|
330
|
+
...
|
|
331
|
+
|
|
332
|
+
@overload
|
|
333
|
+
def add_schema_object(
|
|
334
|
+
self,
|
|
335
|
+
connector_id: str,
|
|
336
|
+
*,
|
|
337
|
+
name: str,
|
|
338
|
+
schema_id: str,
|
|
339
|
+
type: Literal["table"],
|
|
340
|
+
description: Optional[str] | Omit = omit,
|
|
341
|
+
endpoint: Optional[str] | Omit = omit,
|
|
342
|
+
notes: Optional[str] | Omit = omit,
|
|
343
|
+
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
344
|
+
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
345
|
+
extra_headers: Headers | None = None,
|
|
346
|
+
extra_query: Query | None = None,
|
|
347
|
+
extra_body: Body | None = None,
|
|
348
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
349
|
+
) -> ConnectorAddSchemaObjectResponse:
|
|
350
|
+
"""
|
|
351
|
+
Args:
|
|
352
|
+
extra_headers: Send extra headers
|
|
353
|
+
|
|
354
|
+
extra_query: Add additional query parameters to the request
|
|
355
|
+
|
|
356
|
+
extra_body: Add additional JSON properties to the request
|
|
357
|
+
|
|
358
|
+
timeout: Override the client-level default timeout for this request, in seconds
|
|
359
|
+
"""
|
|
360
|
+
...
|
|
361
|
+
|
|
362
|
+
@overload
|
|
363
|
+
def add_schema_object(
|
|
364
|
+
self,
|
|
365
|
+
connector_id: str,
|
|
366
|
+
*,
|
|
367
|
+
column_type: str,
|
|
368
|
+
name: str,
|
|
369
|
+
table_id: str,
|
|
370
|
+
type: Literal["column"],
|
|
371
|
+
notes: Optional[str] | Omit = omit,
|
|
372
|
+
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
373
|
+
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
374
|
+
extra_headers: Headers | None = None,
|
|
375
|
+
extra_query: Query | None = None,
|
|
376
|
+
extra_body: Body | None = None,
|
|
377
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
378
|
+
) -> ConnectorAddSchemaObjectResponse:
|
|
379
|
+
"""
|
|
380
|
+
Args:
|
|
381
|
+
extra_headers: Send extra headers
|
|
382
|
+
|
|
383
|
+
extra_query: Add additional query parameters to the request
|
|
384
|
+
|
|
385
|
+
extra_body: Add additional JSON properties to the request
|
|
386
|
+
|
|
387
|
+
timeout: Override the client-level default timeout for this request, in seconds
|
|
388
|
+
"""
|
|
389
|
+
...
|
|
390
|
+
|
|
391
|
+
@required_args(
|
|
392
|
+
["name", "type"],
|
|
393
|
+
["database_id", "name", "type"],
|
|
394
|
+
["name", "schema_id", "type"],
|
|
395
|
+
["column_type", "name", "table_id", "type"],
|
|
396
|
+
)
|
|
397
|
+
def add_schema_object(
|
|
398
|
+
self,
|
|
399
|
+
connector_id: str,
|
|
400
|
+
*,
|
|
401
|
+
name: str,
|
|
402
|
+
type: Literal["database"] | Literal["schema"] | Literal["table"] | Literal["column"],
|
|
403
|
+
description: Optional[str] | Omit = omit,
|
|
404
|
+
notes: Optional[str] | Omit = omit,
|
|
405
|
+
database_id: str | Omit = omit,
|
|
406
|
+
schema_id: str | Omit = omit,
|
|
407
|
+
endpoint: Optional[str] | Omit = omit,
|
|
408
|
+
column_type: str | Omit = omit,
|
|
409
|
+
table_id: str | Omit = omit,
|
|
410
|
+
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
411
|
+
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
412
|
+
extra_headers: Headers | None = None,
|
|
413
|
+
extra_query: Query | None = None,
|
|
414
|
+
extra_body: Body | None = None,
|
|
415
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
416
|
+
) -> ConnectorAddSchemaObjectResponse:
|
|
417
|
+
if not connector_id:
|
|
418
|
+
raise ValueError(f"Expected a non-empty value for `connector_id` but received {connector_id!r}")
|
|
419
|
+
return cast(
|
|
420
|
+
ConnectorAddSchemaObjectResponse,
|
|
421
|
+
self._post(
|
|
422
|
+
f"/connectors/{connector_id}/schema_object",
|
|
423
|
+
body=maybe_transform(
|
|
424
|
+
{
|
|
425
|
+
"name": name,
|
|
426
|
+
"type": type,
|
|
427
|
+
"description": description,
|
|
428
|
+
"notes": notes,
|
|
429
|
+
"database_id": database_id,
|
|
430
|
+
"schema_id": schema_id,
|
|
431
|
+
"endpoint": endpoint,
|
|
432
|
+
"column_type": column_type,
|
|
433
|
+
"table_id": table_id,
|
|
434
|
+
},
|
|
435
|
+
connector_add_schema_object_params.ConnectorAddSchemaObjectParams,
|
|
436
|
+
),
|
|
437
|
+
options=make_request_options(
|
|
438
|
+
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
|
439
|
+
),
|
|
440
|
+
cast_to=cast(
|
|
441
|
+
Any, ConnectorAddSchemaObjectResponse
|
|
442
|
+
), # Union types cannot be passed in as arguments in the type system
|
|
443
|
+
),
|
|
444
|
+
)
|
|
445
|
+
|
|
273
446
|
def create_secret(
|
|
274
447
|
self,
|
|
275
448
|
connector_id: str,
|
|
@@ -1207,6 +1380,177 @@ class AsyncConnectorsResource(AsyncAPIResource):
|
|
|
1207
1380
|
cast_to=NoneType,
|
|
1208
1381
|
)
|
|
1209
1382
|
|
|
1383
|
+
@overload
|
|
1384
|
+
async def add_schema_object(
|
|
1385
|
+
self,
|
|
1386
|
+
connector_id: str,
|
|
1387
|
+
*,
|
|
1388
|
+
name: str,
|
|
1389
|
+
type: Literal["database"],
|
|
1390
|
+
description: Optional[str] | Omit = omit,
|
|
1391
|
+
notes: Optional[str] | Omit = omit,
|
|
1392
|
+
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
1393
|
+
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
1394
|
+
extra_headers: Headers | None = None,
|
|
1395
|
+
extra_query: Query | None = None,
|
|
1396
|
+
extra_body: Body | None = None,
|
|
1397
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
1398
|
+
) -> ConnectorAddSchemaObjectResponse:
|
|
1399
|
+
"""
|
|
1400
|
+
Args:
|
|
1401
|
+
extra_headers: Send extra headers
|
|
1402
|
+
|
|
1403
|
+
extra_query: Add additional query parameters to the request
|
|
1404
|
+
|
|
1405
|
+
extra_body: Add additional JSON properties to the request
|
|
1406
|
+
|
|
1407
|
+
timeout: Override the client-level default timeout for this request, in seconds
|
|
1408
|
+
"""
|
|
1409
|
+
...
|
|
1410
|
+
|
|
1411
|
+
@overload
|
|
1412
|
+
async def add_schema_object(
|
|
1413
|
+
self,
|
|
1414
|
+
connector_id: str,
|
|
1415
|
+
*,
|
|
1416
|
+
database_id: str,
|
|
1417
|
+
name: str,
|
|
1418
|
+
type: Literal["schema"],
|
|
1419
|
+
description: Optional[str] | Omit = omit,
|
|
1420
|
+
notes: Optional[str] | Omit = omit,
|
|
1421
|
+
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
1422
|
+
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
1423
|
+
extra_headers: Headers | None = None,
|
|
1424
|
+
extra_query: Query | None = None,
|
|
1425
|
+
extra_body: Body | None = None,
|
|
1426
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
1427
|
+
) -> ConnectorAddSchemaObjectResponse:
|
|
1428
|
+
"""
|
|
1429
|
+
Args:
|
|
1430
|
+
extra_headers: Send extra headers
|
|
1431
|
+
|
|
1432
|
+
extra_query: Add additional query parameters to the request
|
|
1433
|
+
|
|
1434
|
+
extra_body: Add additional JSON properties to the request
|
|
1435
|
+
|
|
1436
|
+
timeout: Override the client-level default timeout for this request, in seconds
|
|
1437
|
+
"""
|
|
1438
|
+
...
|
|
1439
|
+
|
|
1440
|
+
@overload
|
|
1441
|
+
async def add_schema_object(
|
|
1442
|
+
self,
|
|
1443
|
+
connector_id: str,
|
|
1444
|
+
*,
|
|
1445
|
+
name: str,
|
|
1446
|
+
schema_id: str,
|
|
1447
|
+
type: Literal["table"],
|
|
1448
|
+
description: Optional[str] | Omit = omit,
|
|
1449
|
+
endpoint: Optional[str] | Omit = omit,
|
|
1450
|
+
notes: Optional[str] | Omit = omit,
|
|
1451
|
+
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
1452
|
+
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
1453
|
+
extra_headers: Headers | None = None,
|
|
1454
|
+
extra_query: Query | None = None,
|
|
1455
|
+
extra_body: Body | None = None,
|
|
1456
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
1457
|
+
) -> ConnectorAddSchemaObjectResponse:
|
|
1458
|
+
"""
|
|
1459
|
+
Args:
|
|
1460
|
+
extra_headers: Send extra headers
|
|
1461
|
+
|
|
1462
|
+
extra_query: Add additional query parameters to the request
|
|
1463
|
+
|
|
1464
|
+
extra_body: Add additional JSON properties to the request
|
|
1465
|
+
|
|
1466
|
+
timeout: Override the client-level default timeout for this request, in seconds
|
|
1467
|
+
"""
|
|
1468
|
+
...
|
|
1469
|
+
|
|
1470
|
+
@overload
|
|
1471
|
+
async def add_schema_object(
|
|
1472
|
+
self,
|
|
1473
|
+
connector_id: str,
|
|
1474
|
+
*,
|
|
1475
|
+
column_type: str,
|
|
1476
|
+
name: str,
|
|
1477
|
+
table_id: str,
|
|
1478
|
+
type: Literal["column"],
|
|
1479
|
+
notes: Optional[str] | Omit = omit,
|
|
1480
|
+
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
1481
|
+
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
1482
|
+
extra_headers: Headers | None = None,
|
|
1483
|
+
extra_query: Query | None = None,
|
|
1484
|
+
extra_body: Body | None = None,
|
|
1485
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
1486
|
+
) -> ConnectorAddSchemaObjectResponse:
|
|
1487
|
+
"""
|
|
1488
|
+
Args:
|
|
1489
|
+
extra_headers: Send extra headers
|
|
1490
|
+
|
|
1491
|
+
extra_query: Add additional query parameters to the request
|
|
1492
|
+
|
|
1493
|
+
extra_body: Add additional JSON properties to the request
|
|
1494
|
+
|
|
1495
|
+
timeout: Override the client-level default timeout for this request, in seconds
|
|
1496
|
+
"""
|
|
1497
|
+
...
|
|
1498
|
+
|
|
1499
|
+
@required_args(
|
|
1500
|
+
["name", "type"],
|
|
1501
|
+
["database_id", "name", "type"],
|
|
1502
|
+
["name", "schema_id", "type"],
|
|
1503
|
+
["column_type", "name", "table_id", "type"],
|
|
1504
|
+
)
|
|
1505
|
+
async def add_schema_object(
|
|
1506
|
+
self,
|
|
1507
|
+
connector_id: str,
|
|
1508
|
+
*,
|
|
1509
|
+
name: str,
|
|
1510
|
+
type: Literal["database"] | Literal["schema"] | Literal["table"] | Literal["column"],
|
|
1511
|
+
description: Optional[str] | Omit = omit,
|
|
1512
|
+
notes: Optional[str] | Omit = omit,
|
|
1513
|
+
database_id: str | Omit = omit,
|
|
1514
|
+
schema_id: str | Omit = omit,
|
|
1515
|
+
endpoint: Optional[str] | Omit = omit,
|
|
1516
|
+
column_type: str | Omit = omit,
|
|
1517
|
+
table_id: str | Omit = omit,
|
|
1518
|
+
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
|
|
1519
|
+
# The extra values given here take precedence over values defined on the client or passed to this method.
|
|
1520
|
+
extra_headers: Headers | None = None,
|
|
1521
|
+
extra_query: Query | None = None,
|
|
1522
|
+
extra_body: Body | None = None,
|
|
1523
|
+
timeout: float | httpx.Timeout | None | NotGiven = not_given,
|
|
1524
|
+
) -> ConnectorAddSchemaObjectResponse:
|
|
1525
|
+
if not connector_id:
|
|
1526
|
+
raise ValueError(f"Expected a non-empty value for `connector_id` but received {connector_id!r}")
|
|
1527
|
+
return cast(
|
|
1528
|
+
ConnectorAddSchemaObjectResponse,
|
|
1529
|
+
await self._post(
|
|
1530
|
+
f"/connectors/{connector_id}/schema_object",
|
|
1531
|
+
body=await async_maybe_transform(
|
|
1532
|
+
{
|
|
1533
|
+
"name": name,
|
|
1534
|
+
"type": type,
|
|
1535
|
+
"description": description,
|
|
1536
|
+
"notes": notes,
|
|
1537
|
+
"database_id": database_id,
|
|
1538
|
+
"schema_id": schema_id,
|
|
1539
|
+
"endpoint": endpoint,
|
|
1540
|
+
"column_type": column_type,
|
|
1541
|
+
"table_id": table_id,
|
|
1542
|
+
},
|
|
1543
|
+
connector_add_schema_object_params.ConnectorAddSchemaObjectParams,
|
|
1544
|
+
),
|
|
1545
|
+
options=make_request_options(
|
|
1546
|
+
extra_headers=extra_headers, extra_query=extra_query, extra_body=extra_body, timeout=timeout
|
|
1547
|
+
),
|
|
1548
|
+
cast_to=cast(
|
|
1549
|
+
Any, ConnectorAddSchemaObjectResponse
|
|
1550
|
+
), # Union types cannot be passed in as arguments in the type system
|
|
1551
|
+
),
|
|
1552
|
+
)
|
|
1553
|
+
|
|
1210
1554
|
async def create_secret(
|
|
1211
1555
|
self,
|
|
1212
1556
|
connector_id: str,
|
|
@@ -1951,6 +2295,9 @@ class ConnectorsResourceWithRawResponse:
|
|
|
1951
2295
|
self.delete = to_raw_response_wrapper(
|
|
1952
2296
|
connectors.delete,
|
|
1953
2297
|
)
|
|
2298
|
+
self.add_schema_object = to_raw_response_wrapper(
|
|
2299
|
+
connectors.add_schema_object,
|
|
2300
|
+
)
|
|
1954
2301
|
self.create_secret = to_raw_response_wrapper(
|
|
1955
2302
|
connectors.create_secret,
|
|
1956
2303
|
)
|
|
@@ -2024,6 +2371,9 @@ class AsyncConnectorsResourceWithRawResponse:
|
|
|
2024
2371
|
self.delete = async_to_raw_response_wrapper(
|
|
2025
2372
|
connectors.delete,
|
|
2026
2373
|
)
|
|
2374
|
+
self.add_schema_object = async_to_raw_response_wrapper(
|
|
2375
|
+
connectors.add_schema_object,
|
|
2376
|
+
)
|
|
2027
2377
|
self.create_secret = async_to_raw_response_wrapper(
|
|
2028
2378
|
connectors.create_secret,
|
|
2029
2379
|
)
|
|
@@ -2097,6 +2447,9 @@ class ConnectorsResourceWithStreamingResponse:
|
|
|
2097
2447
|
self.delete = to_streamed_response_wrapper(
|
|
2098
2448
|
connectors.delete,
|
|
2099
2449
|
)
|
|
2450
|
+
self.add_schema_object = to_streamed_response_wrapper(
|
|
2451
|
+
connectors.add_schema_object,
|
|
2452
|
+
)
|
|
2100
2453
|
self.create_secret = to_streamed_response_wrapper(
|
|
2101
2454
|
connectors.create_secret,
|
|
2102
2455
|
)
|
|
@@ -2170,6 +2523,9 @@ class AsyncConnectorsResourceWithStreamingResponse:
|
|
|
2170
2523
|
self.delete = async_to_streamed_response_wrapper(
|
|
2171
2524
|
connectors.delete,
|
|
2172
2525
|
)
|
|
2526
|
+
self.add_schema_object = async_to_streamed_response_wrapper(
|
|
2527
|
+
connectors.add_schema_object,
|
|
2528
|
+
)
|
|
2173
2529
|
self.create_secret = async_to_streamed_response_wrapper(
|
|
2174
2530
|
connectors.create_secret,
|
|
2175
2531
|
)
|
structify/resources/polars.py
CHANGED
|
@@ -16,7 +16,6 @@ from structify.types.entity_param import EntityParam
|
|
|
16
16
|
from structify.types.property_type_param import PropertyTypeParam
|
|
17
17
|
from structify.types.dataset_create_params import Relationship as CreateRelationshipParam
|
|
18
18
|
from structify.types.knowledge_graph_param import KnowledgeGraphParam
|
|
19
|
-
from structify.types.dataset_view_table_response import Properties
|
|
20
19
|
|
|
21
20
|
from ..types import TableParam
|
|
22
21
|
from .._compat import cached_property
|
|
@@ -35,6 +34,17 @@ from ..types.structure_run_async_params import SourceWebWeb
|
|
|
35
34
|
__all__ = ["PolarsResource"]
|
|
36
35
|
|
|
37
36
|
MAX_PARALLEL_REQUESTS = 20
|
|
37
|
+
STRUCTIFY_JOB_ID_COLUMN = "structify_job_id"
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _collect_entities_with_job_ids(entities: Any) -> List[Dict[str, Any]]:
|
|
41
|
+
"""Collect entity properties with their first job_id."""
|
|
42
|
+
results: List[Dict[str, Any]] = []
|
|
43
|
+
for entity in entities:
|
|
44
|
+
row: Dict[str, Any] = dict(entity.properties)
|
|
45
|
+
row[STRUCTIFY_JOB_ID_COLUMN] = entity.job_ids[0] if entity.job_ids else None
|
|
46
|
+
results.append(row)
|
|
47
|
+
return results
|
|
38
48
|
|
|
39
49
|
|
|
40
50
|
class PolarsResource(SyncAPIResource):
|
|
@@ -164,8 +174,9 @@ class PolarsResource(SyncAPIResource):
|
|
|
164
174
|
# Get the node ID when the function is called, not when the batch is processed
|
|
165
175
|
node_id = get_node_id()
|
|
166
176
|
|
|
167
|
-
# Create the expected output schema
|
|
177
|
+
# Create the expected output schema with single job_id column
|
|
168
178
|
expected_schema = properties_to_schema(all_properties)
|
|
179
|
+
expected_schema[STRUCTIFY_JOB_ID_COLUMN] = pl.String
|
|
169
180
|
|
|
170
181
|
# Apply Structify enrich on the dataframe
|
|
171
182
|
def enhance_batch(batch_df: pl.DataFrame) -> pl.DataFrame:
|
|
@@ -249,11 +260,10 @@ class PolarsResource(SyncAPIResource):
|
|
|
249
260
|
# 3. Wait for all jobs to complete
|
|
250
261
|
title = f"Enriching {property_names} for {dataframe_name}"
|
|
251
262
|
self._client.jobs.wait_for_jobs(dataset_name=dataset_name, title=title, node_id=node_id)
|
|
252
|
-
# 4. Collect the results
|
|
253
|
-
results =
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
]
|
|
263
|
+
# 4. Collect the results with job_ids
|
|
264
|
+
results = _collect_entities_with_job_ids(
|
|
265
|
+
self._client.datasets.view_table(dataset=dataset_name, name=dataframe_name)
|
|
266
|
+
)
|
|
257
267
|
# 5. Return the results
|
|
258
268
|
return pl.DataFrame(results, schema=expected_schema)
|
|
259
269
|
|
|
@@ -296,6 +306,7 @@ class PolarsResource(SyncAPIResource):
|
|
|
296
306
|
target_columns[col_name] = col_info.get("type", pl.String())
|
|
297
307
|
|
|
298
308
|
output_schema = _merge_schema_with_suffix(input_schema, target_columns, suffix=target_table_name)
|
|
309
|
+
output_schema[STRUCTIFY_JOB_ID_COLUMN] = pl.String
|
|
299
310
|
|
|
300
311
|
target_properties: list[Property] = [
|
|
301
312
|
Property(
|
|
@@ -412,6 +423,7 @@ class PolarsResource(SyncAPIResource):
|
|
|
412
423
|
prop_name if prop_name not in input_schema else f"{prop_name}_{target_table_name}"
|
|
413
424
|
) # If the column already exists in the input schema, we need to suffix it with the target table name
|
|
414
425
|
result_row[eff] = target_entity.properties.get(prop_name)
|
|
426
|
+
result_row[STRUCTIFY_JOB_ID_COLUMN] = target_entity.job_ids[0] if target_entity.job_ids else None
|
|
415
427
|
result_rows.append(result_row)
|
|
416
428
|
|
|
417
429
|
# Handle source rows without relationships
|
|
@@ -422,6 +434,7 @@ class PolarsResource(SyncAPIResource):
|
|
|
422
434
|
for prop_name in target_schema.keys():
|
|
423
435
|
eff = prop_name if prop_name not in input_schema else f"{prop_name}_{target_table_name}"
|
|
424
436
|
orphan_row[eff] = None
|
|
437
|
+
orphan_row[STRUCTIFY_JOB_ID_COLUMN] = None
|
|
425
438
|
result_rows.append(orphan_row)
|
|
426
439
|
|
|
427
440
|
if not result_rows:
|
|
@@ -440,14 +453,11 @@ class PolarsResource(SyncAPIResource):
|
|
|
440
453
|
dataframe_name: str,
|
|
441
454
|
dataframe_description: str,
|
|
442
455
|
use_proxy: bool = False,
|
|
443
|
-
include_job_ids: bool = False,
|
|
444
456
|
) -> LazyFrame:
|
|
445
457
|
"""
|
|
446
458
|
Enhance one or more columns of a `LazyFrame` directly from a URL.
|
|
447
459
|
|
|
448
|
-
|
|
449
|
-
output DataFrame with the Structify job id for each URL. The job id is not
|
|
450
|
-
stored in Structify.
|
|
460
|
+
Adds a `structify_job_id` column with the job id for each row.
|
|
451
461
|
"""
|
|
452
462
|
|
|
453
463
|
# Existing columns & their dtypes from the LazyFrame
|
|
@@ -475,8 +485,6 @@ class PolarsResource(SyncAPIResource):
|
|
|
475
485
|
for col_name, (dtype, desc) in new_columns_dict.items()
|
|
476
486
|
]
|
|
477
487
|
|
|
478
|
-
job_id_column: str | None = "job_id" if include_job_ids else None
|
|
479
|
-
|
|
480
488
|
all_properties = merge_column_properties(pre_existing_properties, new_column_properties)
|
|
481
489
|
|
|
482
490
|
dataset_name = f"enhance_{dataframe_name}_{uuid.uuid4().hex}"
|
|
@@ -504,10 +512,9 @@ class PolarsResource(SyncAPIResource):
|
|
|
504
512
|
# Get the node ID when the function is called, not when the batch is processed
|
|
505
513
|
node_id = get_node_id()
|
|
506
514
|
|
|
507
|
-
# Create the expected output schema
|
|
515
|
+
# Create the expected output schema with single job_id column
|
|
508
516
|
expected_schema = properties_to_schema(all_properties)
|
|
509
|
-
|
|
510
|
-
expected_schema[job_id_column] = pl.String
|
|
517
|
+
expected_schema[STRUCTIFY_JOB_ID_COLUMN] = pl.String
|
|
511
518
|
|
|
512
519
|
# Apply Structify scrape on the dataframe
|
|
513
520
|
def scrape_batch(batch_df: pl.DataFrame) -> pl.DataFrame:
|
|
@@ -537,8 +544,6 @@ class PolarsResource(SyncAPIResource):
|
|
|
537
544
|
entity_id_to_entity[entity_id] = entity
|
|
538
545
|
|
|
539
546
|
# 2. Run scrape jobs for each entity
|
|
540
|
-
job_ids_by_url: Dict[str, str] = {}
|
|
541
|
-
|
|
542
547
|
def scrape_entity_property(entity_id: str) -> None:
|
|
543
548
|
entity = entity_id_to_entity[entity_id]
|
|
544
549
|
url = entity["properties"].get(url_column)
|
|
@@ -549,7 +554,7 @@ class PolarsResource(SyncAPIResource):
|
|
|
549
554
|
f"URL column {url_column} must be of string type, got {type(entity['properties'][url_column])}"
|
|
550
555
|
)
|
|
551
556
|
|
|
552
|
-
|
|
557
|
+
self._client.scrape.scrape(
|
|
553
558
|
dataset_name=dataset_name,
|
|
554
559
|
extraction_criteria=[
|
|
555
560
|
RequiredProperty(
|
|
@@ -566,8 +571,6 @@ class PolarsResource(SyncAPIResource):
|
|
|
566
571
|
use_proxy=use_proxy,
|
|
567
572
|
url=url,
|
|
568
573
|
)
|
|
569
|
-
if job_id_column is not None:
|
|
570
|
-
job_ids_by_url[url] = response.job_id
|
|
571
574
|
|
|
572
575
|
property_list = list(new_columns_dict.keys())
|
|
573
576
|
if len(property_list) == 1:
|
|
@@ -592,17 +595,10 @@ class PolarsResource(SyncAPIResource):
|
|
|
592
595
|
title = f"Scraping {property_names} for {dataframe_name}"
|
|
593
596
|
self._client.jobs.wait_for_jobs(dataset_name=dataset_name, title=title, node_id=node_id)
|
|
594
597
|
|
|
595
|
-
# 4. Collect the results
|
|
596
|
-
results
|
|
597
|
-
|
|
598
|
-
|
|
599
|
-
if job_id_column is not None:
|
|
600
|
-
url = properties.get(url_column)
|
|
601
|
-
if isinstance(url, str):
|
|
602
|
-
job_id = job_ids_by_url.get(url)
|
|
603
|
-
if job_id is not None:
|
|
604
|
-
properties[job_id_column] = job_id
|
|
605
|
-
results.append(properties)
|
|
598
|
+
# 4. Collect the results with job_id
|
|
599
|
+
results = _collect_entities_with_job_ids(
|
|
600
|
+
self._client.datasets.view_table(dataset=dataset_name, name=dataframe_name)
|
|
601
|
+
)
|
|
606
602
|
|
|
607
603
|
# 5. Return the results
|
|
608
604
|
return pl.DataFrame(results, schema=expected_schema)
|
|
@@ -657,6 +653,7 @@ class PolarsResource(SyncAPIResource):
|
|
|
657
653
|
}
|
|
658
654
|
|
|
659
655
|
output_schema = _merge_schema_with_suffix(input_schema, scraped_columns, suffix=relationship["target_table"])
|
|
656
|
+
output_schema[STRUCTIFY_JOB_ID_COLUMN] = pl.String
|
|
660
657
|
|
|
661
658
|
properties: list[Property] = []
|
|
662
659
|
for col_name, col_info in scrape_schema.items():
|
|
@@ -758,6 +755,9 @@ class PolarsResource(SyncAPIResource):
|
|
|
758
755
|
result_row: dict[str, Any] = {
|
|
759
756
|
**scraped_entity.properties,
|
|
760
757
|
url_column: related_entity.properties[url_column],
|
|
758
|
+
STRUCTIFY_JOB_ID_COLUMN: scraped_entity.job_ids[0]
|
|
759
|
+
if scraped_entity.job_ids
|
|
760
|
+
else None,
|
|
761
761
|
}
|
|
762
762
|
result_rows.append(result_row)
|
|
763
763
|
offset += LIMIT
|
|
@@ -765,8 +765,11 @@ class PolarsResource(SyncAPIResource):
|
|
|
765
765
|
break
|
|
766
766
|
except Exception:
|
|
767
767
|
break
|
|
768
|
-
# Build scraped schema (pre-join, original names) incl. join column
|
|
769
|
-
scraped_schema = scraped_columns | {
|
|
768
|
+
# Build scraped schema (pre-join, original names) incl. join column and job_id
|
|
769
|
+
scraped_schema: Dict[str, pl.DataType] = scraped_columns | {
|
|
770
|
+
url_column: input_schema[url_column],
|
|
771
|
+
STRUCTIFY_JOB_ID_COLUMN: pl.String(),
|
|
772
|
+
}
|
|
770
773
|
|
|
771
774
|
# Fill missing columns in scraped results
|
|
772
775
|
for result_row in result_rows:
|
|
@@ -839,6 +842,7 @@ class PolarsResource(SyncAPIResource):
|
|
|
839
842
|
polars_schema = pl.Schema(
|
|
840
843
|
[(path_column, pl.String())]
|
|
841
844
|
+ [(col_name, col_info.get("type", pl.String())) for col_name, col_info in schema.items()]
|
|
845
|
+
+ [(STRUCTIFY_JOB_ID_COLUMN, pl.String())]
|
|
842
846
|
)
|
|
843
847
|
|
|
844
848
|
assert path_column in document_paths.collect_schema(), (
|
|
@@ -931,9 +935,15 @@ class PolarsResource(SyncAPIResource):
|
|
|
931
935
|
|
|
932
936
|
# Get all of the entities with their job_ids
|
|
933
937
|
entities = self._client.datasets.view_table(dataset=dataset_name, name=table_name)
|
|
934
|
-
structured_results: List[Dict[str, Any]] = [
|
|
935
|
-
|
|
936
|
-
|
|
938
|
+
structured_results: List[Dict[str, Any]] = []
|
|
939
|
+
for entity in entities:
|
|
940
|
+
job_id = entity.job_ids[0] if entity.job_ids else None
|
|
941
|
+
result_row: Dict[str, Any] = {
|
|
942
|
+
**entity.properties,
|
|
943
|
+
path_column: job_to_pdf_path.get(job_id) if job_id else None,
|
|
944
|
+
STRUCTIFY_JOB_ID_COLUMN: job_id,
|
|
945
|
+
}
|
|
946
|
+
structured_results.append(result_row)
|
|
937
947
|
|
|
938
948
|
# Ensure all columns are present with None for missing values
|
|
939
949
|
for result_row in structured_results:
|
|
@@ -986,6 +996,7 @@ class PolarsResource(SyncAPIResource):
|
|
|
986
996
|
all_properties = existing_properties + [new_property]
|
|
987
997
|
|
|
988
998
|
expected_schema = properties_to_schema(all_properties)
|
|
999
|
+
expected_schema[STRUCTIFY_JOB_ID_COLUMN] = pl.String
|
|
989
1000
|
if collected_df.is_empty():
|
|
990
1001
|
return pl.DataFrame(schema=expected_schema).lazy()
|
|
991
1002
|
|
|
@@ -1024,12 +1035,12 @@ class PolarsResource(SyncAPIResource):
|
|
|
1024
1035
|
node_id=node_id,
|
|
1025
1036
|
)
|
|
1026
1037
|
|
|
1027
|
-
# 3. Collect the results
|
|
1038
|
+
# 3. Collect the results with job_ids
|
|
1028
1039
|
title = f"Tagging {new_property_name} for {dataframe_name}"
|
|
1029
1040
|
self._client.jobs.wait_for_jobs(dataset_name=dataset_name, title=title, node_id=node_id)
|
|
1030
|
-
results =
|
|
1031
|
-
|
|
1032
|
-
|
|
1041
|
+
results = _collect_entities_with_job_ids(
|
|
1042
|
+
self._client.datasets.view_table(dataset=dataset_name, name=dataframe_name)
|
|
1043
|
+
)
|
|
1033
1044
|
|
|
1034
1045
|
# 4. Return the results
|
|
1035
1046
|
return pl.DataFrame(results, schema=expected_schema).lazy()
|
|
@@ -1157,6 +1168,7 @@ class PolarsResource(SyncAPIResource):
|
|
|
1157
1168
|
"idx1": [match.target_entity_index for match in matches],
|
|
1158
1169
|
"idx2": [match.source_entity_index for match in matches],
|
|
1159
1170
|
"match_reason": [match.match_reason for match in matches],
|
|
1171
|
+
STRUCTIFY_JOB_ID_COLUMN: [match.job_id for match in matches],
|
|
1160
1172
|
}
|
|
1161
1173
|
else:
|
|
1162
1174
|
# No swap, return as normal
|
|
@@ -1164,6 +1176,7 @@ class PolarsResource(SyncAPIResource):
|
|
|
1164
1176
|
"idx1": [match.source_entity_index for match in matches],
|
|
1165
1177
|
"idx2": [match.target_entity_index for match in matches],
|
|
1166
1178
|
"match_reason": [match.match_reason for match in matches],
|
|
1179
|
+
STRUCTIFY_JOB_ID_COLUMN: [match.job_id for match in matches],
|
|
1167
1180
|
}
|
|
1168
1181
|
|
|
1169
1182
|
return pl.DataFrame(matches_in_schema).lazy()
|
|
@@ -1182,7 +1195,7 @@ class PolarsResource(SyncAPIResource):
|
|
|
1182
1195
|
"/entity/upload_parquet",
|
|
1183
1196
|
params={"dataset": dataset_name, "table_name": table_name},
|
|
1184
1197
|
files={"file": ("data.parquet", parquet_bytes.getvalue(), "application/octet-stream")},
|
|
1185
|
-
headers=
|
|
1198
|
+
headers=self._client.auth_headers,
|
|
1186
1199
|
)
|
|
1187
1200
|
response.raise_for_status()
|
|
1188
1201
|
|