unstructured-ingest 1.0.4__py3-none-any.whl → 1.0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- unstructured_ingest/__version__.py +1 -1
- unstructured_ingest/data_types/file_data.py +22 -0
- unstructured_ingest/processes/connectors/confluence.py +3 -0
- unstructured_ingest/processes/connectors/google_drive.py +3 -1
- {unstructured_ingest-1.0.4.dist-info → unstructured_ingest-1.0.5.dist-info}/METADATA +1 -1
- {unstructured_ingest-1.0.4.dist-info → unstructured_ingest-1.0.5.dist-info}/RECORD +9 -9
- {unstructured_ingest-1.0.4.dist-info → unstructured_ingest-1.0.5.dist-info}/WHEEL +0 -0
- {unstructured_ingest-1.0.4.dist-info → unstructured_ingest-1.0.5.dist-info}/entry_points.txt +0 -0
- {unstructured_ingest-1.0.4.dist-info → unstructured_ingest-1.0.5.dist-info}/licenses/LICENSE.md +0 -0
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "1.0.
|
|
1
|
+
__version__ = "1.0.5" # pragma: no cover
|
|
@@ -32,6 +32,28 @@ class FileDataSourceMetadata(BaseModel):
|
|
|
32
32
|
permissions_data: Union[list[dict[str, Any]], dict[str, Any], None] = None
|
|
33
33
|
filesize_bytes: Optional[int] = None
|
|
34
34
|
|
|
35
|
+
@field_validator("permissions_data", mode="before")
|
|
36
|
+
@classmethod
|
|
37
|
+
def coerce_permissions_data(cls, v: Any) -> Any:
|
|
38
|
+
if isinstance(v, dict):
|
|
39
|
+
# Temporarily convert dict to list for validation
|
|
40
|
+
return [v]
|
|
41
|
+
return v
|
|
42
|
+
|
|
43
|
+
@field_validator("permissions_data", mode="after")
|
|
44
|
+
@classmethod
|
|
45
|
+
def restore_dict_permissions_data(
|
|
46
|
+
cls, v: Optional[list[dict[str, Any]]]
|
|
47
|
+
) -> Union[list[dict[str, Any]], dict[str, Any], None]:
|
|
48
|
+
if (
|
|
49
|
+
isinstance(v, list)
|
|
50
|
+
and len(v) == 1
|
|
51
|
+
and isinstance(v[0], dict)
|
|
52
|
+
and any(isinstance(val, dict) for val in v[0].values())
|
|
53
|
+
):
|
|
54
|
+
return v[0]
|
|
55
|
+
return v
|
|
56
|
+
|
|
35
57
|
|
|
36
58
|
class FileData(BaseModel):
|
|
37
59
|
identifier: str
|
|
@@ -352,6 +352,7 @@ class ConfluenceDownloader(Downloader):
|
|
|
352
352
|
def _get_permissions_for_space(self, space_id: int) -> Optional[List[dict]]:
|
|
353
353
|
if space_id in self._permissions_cache:
|
|
354
354
|
self._permissions_cache.move_to_end(space_id) # mark recent use
|
|
355
|
+
logger.debug(f"Retrieved cached permissions for space {space_id}")
|
|
355
356
|
return self._permissions_cache[space_id]
|
|
356
357
|
else:
|
|
357
358
|
with self.connection_config.get_client() as client:
|
|
@@ -371,6 +372,7 @@ class ConfluenceDownloader(Downloader):
|
|
|
371
372
|
self._permissions_cache.popitem(last=False) # LRU/FIFO eviction
|
|
372
373
|
self._permissions_cache[space_id] = space_permissions
|
|
373
374
|
|
|
375
|
+
logger.debug(f"Retrieved permissions for space {space_id}")
|
|
374
376
|
return space_permissions
|
|
375
377
|
except Exception as e:
|
|
376
378
|
logger.debug(f"Could not retrieve permissions for space {space_id}: {e}")
|
|
@@ -387,6 +389,7 @@ class ConfluenceDownloader(Downloader):
|
|
|
387
389
|
logger.debug(f"Could not retrieve permissions for doc {doc_id}: {e}")
|
|
388
390
|
return None
|
|
389
391
|
|
|
392
|
+
logger.debug(f"normalized permissions generated: {parsed_permissions_dict}")
|
|
390
393
|
return parsed_permissions_dict
|
|
391
394
|
|
|
392
395
|
def run(self, file_data: FileData, **kwargs) -> download_responses:
|
|
@@ -409,8 +409,9 @@ class GoogleDriveIndexer(Indexer):
|
|
|
409
409
|
d.metadata.record_locator["drive_id"]: object_id
|
|
410
410
|
return data
|
|
411
411
|
|
|
412
|
-
def extract_permissions(self, permissions: list[dict]) -> dict:
|
|
412
|
+
def extract_permissions(self, permissions: Optional[list[dict]]) -> dict:
|
|
413
413
|
if not permissions:
|
|
414
|
+
logger.debug("no permissions found")
|
|
414
415
|
return {}
|
|
415
416
|
|
|
416
417
|
# https://developers.google.com/workspace/drive/api/guides/ref-roles
|
|
@@ -442,6 +443,7 @@ class GoogleDriveIndexer(Indexer):
|
|
|
442
443
|
for key in role_dict:
|
|
443
444
|
role_dict[key] = sorted(role_dict[key])
|
|
444
445
|
|
|
446
|
+
logger.debug(f"normalized permissions generated: {normalized_permissions}")
|
|
445
447
|
return normalized_permissions
|
|
446
448
|
|
|
447
449
|
def run(self, **kwargs: Any) -> Generator[FileData, None, None]:
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
|
|
2
|
-
unstructured_ingest/__version__.py,sha256=
|
|
2
|
+
unstructured_ingest/__version__.py,sha256=64jEAZw6WkJoKXj1K79a59MykYVOTTwLRMWIdobOAH0,42
|
|
3
3
|
unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
|
|
4
4
|
unstructured_ingest/errors_v2.py,sha256=9RuRCi7lbDxCguDz07y5RiHoQiFIOWwOD7xqzJ2B3Yw,436
|
|
5
5
|
unstructured_ingest/logger.py,sha256=7e_7UeK6hVOd5BQ6i9NzRUAPCS_DF839Y8TjUDywraY,1428
|
|
@@ -19,7 +19,7 @@ unstructured_ingest/cli/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5N
|
|
|
19
19
|
unstructured_ingest/cli/utils/click.py,sha256=1_eJgrwS2DFBl1jZPLsj1vgVgR7agFBIEBe4A_n7mH4,7827
|
|
20
20
|
unstructured_ingest/cli/utils/model_conversion.py,sha256=hMjAfOVvO1RXTDsw26mmersdncvddkb_rP9JTEgVVCw,7649
|
|
21
21
|
unstructured_ingest/data_types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
22
|
-
unstructured_ingest/data_types/file_data.py,sha256=
|
|
22
|
+
unstructured_ingest/data_types/file_data.py,sha256=7JwwbcgVQdIwCKxrDLUYvJp1f-bzaiGQD8ETr-Ywph8,4571
|
|
23
23
|
unstructured_ingest/embed/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
24
24
|
unstructured_ingest/embed/azure_openai.py,sha256=_-I-nwd-wdCiKkSdYBL4UKrTZ2UPWsM_0T69fcObs_I,1707
|
|
25
25
|
unstructured_ingest/embed/bedrock.py,sha256=t58V_QQjWPO62CTuP0aLFMDisPeXpxG2xSFGUhN-JvI,7726
|
|
@@ -65,13 +65,13 @@ unstructured_ingest/processes/connectors/airtable.py,sha256=smx5qBSUKwM8V6Xcc7ik
|
|
|
65
65
|
unstructured_ingest/processes/connectors/astradb.py,sha256=Ob9wQgDxa6BXDPZBOqooNKQgvjIZcMwIe4fW3VlI7h8,18929
|
|
66
66
|
unstructured_ingest/processes/connectors/azure_ai_search.py,sha256=szhSRXzUHk0DE2hGFfjGc_jNFzlUwiRlCtIkuu7tmnk,11524
|
|
67
67
|
unstructured_ingest/processes/connectors/chroma.py,sha256=q5_Fu4xb6_W_NyrPxVa3-jVwZLqVdlBNlR4dFvbd7l0,7235
|
|
68
|
-
unstructured_ingest/processes/connectors/confluence.py,sha256=
|
|
68
|
+
unstructured_ingest/processes/connectors/confluence.py,sha256=VAHGs_8HPYgBN8s8YwM7-LdzQ5MI_UEWXcMAMdpWLYk,20983
|
|
69
69
|
unstructured_ingest/processes/connectors/couchbase.py,sha256=KCHoYDNya9B05NIB5D78zXoizFyfpJRepcYBe1nLSOs,12298
|
|
70
70
|
unstructured_ingest/processes/connectors/delta_table.py,sha256=2DFox_Vzoopt_D3Jy3rCjrrTGMutG2INIrwCeoIohRY,7340
|
|
71
71
|
unstructured_ingest/processes/connectors/discord.py,sha256=6yEJ_agfKUqsV43wFsbMkcd8lcLJC0uqbo4izjdZ3rU,5294
|
|
72
72
|
unstructured_ingest/processes/connectors/github.py,sha256=smHCz6jOH1p_hW2S25bYunBBj_pYjz8HTw6wkzaJz_A,7765
|
|
73
73
|
unstructured_ingest/processes/connectors/gitlab.py,sha256=6h1CdqznJmzeWxGfXrFLdNdT23PExGnUMMX7usK_4Kk,10013
|
|
74
|
-
unstructured_ingest/processes/connectors/google_drive.py,sha256=
|
|
74
|
+
unstructured_ingest/processes/connectors/google_drive.py,sha256=Nu6AA0yDCrtoSq5hqvpKJFNRFF0JcxHjZtDVbLay33Q,21817
|
|
75
75
|
unstructured_ingest/processes/connectors/jira.py,sha256=eG8yTn8ZVEz7rBJ-ha8i_d9hEh6VALN6QJT_vbYvbL0,17142
|
|
76
76
|
unstructured_ingest/processes/connectors/kdbai.py,sha256=XhxYpKSAoFPBsDQWwNuLX03DCxOVr7yquj9VYM55Rtc,5174
|
|
77
77
|
unstructured_ingest/processes/connectors/local.py,sha256=LluTLKv4g7FbJb4A6vuSxI9VhzKZuuQUpDS-cVNAQ2g,7426
|
|
@@ -230,8 +230,8 @@ unstructured_ingest/utils/ndjson.py,sha256=nz8VUOPEgAFdhaDOpuveknvCU4x82fVwqE01q
|
|
|
230
230
|
unstructured_ingest/utils/pydantic_models.py,sha256=BT_j15e4rX40wQbt8LUXbqfPhA3rJn1PHTI_G_A_EHY,1720
|
|
231
231
|
unstructured_ingest/utils/string_and_date_utils.py,sha256=oXOI6rxXq-8ncbk7EoJK0WCcTXWj75EzKl8pfQMID3U,2522
|
|
232
232
|
unstructured_ingest/utils/table.py,sha256=WZechczgVFvlodUWFcsnCGvBNh1xRm6hr0VbJTPxKAc,3669
|
|
233
|
-
unstructured_ingest-1.0.
|
|
234
|
-
unstructured_ingest-1.0.
|
|
235
|
-
unstructured_ingest-1.0.
|
|
236
|
-
unstructured_ingest-1.0.
|
|
237
|
-
unstructured_ingest-1.0.
|
|
233
|
+
unstructured_ingest-1.0.5.dist-info/METADATA,sha256=D8wUuNkaBZMshLsm-S5kcLGgsJOv-xO6naAFJM2eVqI,8719
|
|
234
|
+
unstructured_ingest-1.0.5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
235
|
+
unstructured_ingest-1.0.5.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
|
|
236
|
+
unstructured_ingest-1.0.5.dist-info/licenses/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
|
|
237
|
+
unstructured_ingest-1.0.5.dist-info/RECORD,,
|
|
File without changes
|
{unstructured_ingest-1.0.4.dist-info → unstructured_ingest-1.0.5.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.4.dist-info → unstructured_ingest-1.0.5.dist-info}/licenses/LICENSE.md
RENAMED
|
File without changes
|