unstructured-ingest 1.0.4__py3-none-any.whl → 1.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

@@ -1 +1 @@
1
- __version__ = "1.0.4" # pragma: no cover
1
+ __version__ = "1.0.5" # pragma: no cover
@@ -32,6 +32,28 @@ class FileDataSourceMetadata(BaseModel):
32
32
  permissions_data: Union[list[dict[str, Any]], dict[str, Any], None] = None
33
33
  filesize_bytes: Optional[int] = None
34
34
 
35
+ @field_validator("permissions_data", mode="before")
36
+ @classmethod
37
+ def coerce_permissions_data(cls, v: Any) -> Any:
38
+ if isinstance(v, dict):
39
+ # Temporarily convert dict to list for validation
40
+ return [v]
41
+ return v
42
+
43
+ @field_validator("permissions_data", mode="after")
44
+ @classmethod
45
+ def restore_dict_permissions_data(
46
+ cls, v: Optional[list[dict[str, Any]]]
47
+ ) -> Union[list[dict[str, Any]], dict[str, Any], None]:
48
+ if (
49
+ isinstance(v, list)
50
+ and len(v) == 1
51
+ and isinstance(v[0], dict)
52
+ and any(isinstance(val, dict) for val in v[0].values())
53
+ ):
54
+ return v[0]
55
+ return v
56
+
35
57
 
36
58
  class FileData(BaseModel):
37
59
  identifier: str
@@ -352,6 +352,7 @@ class ConfluenceDownloader(Downloader):
352
352
  def _get_permissions_for_space(self, space_id: int) -> Optional[List[dict]]:
353
353
  if space_id in self._permissions_cache:
354
354
  self._permissions_cache.move_to_end(space_id) # mark recent use
355
+ logger.debug(f"Retrieved cached permissions for space {space_id}")
355
356
  return self._permissions_cache[space_id]
356
357
  else:
357
358
  with self.connection_config.get_client() as client:
@@ -371,6 +372,7 @@ class ConfluenceDownloader(Downloader):
371
372
  self._permissions_cache.popitem(last=False) # LRU/FIFO eviction
372
373
  self._permissions_cache[space_id] = space_permissions
373
374
 
375
+ logger.debug(f"Retrieved permissions for space {space_id}")
374
376
  return space_permissions
375
377
  except Exception as e:
376
378
  logger.debug(f"Could not retrieve permissions for space {space_id}: {e}")
@@ -387,6 +389,7 @@ class ConfluenceDownloader(Downloader):
387
389
  logger.debug(f"Could not retrieve permissions for doc {doc_id}: {e}")
388
390
  return None
389
391
 
392
+ logger.debug(f"normalized permissions generated: {parsed_permissions_dict}")
390
393
  return parsed_permissions_dict
391
394
 
392
395
  def run(self, file_data: FileData, **kwargs) -> download_responses:
@@ -409,8 +409,9 @@ class GoogleDriveIndexer(Indexer):
409
409
  d.metadata.record_locator["drive_id"]: object_id
410
410
  return data
411
411
 
412
- def extract_permissions(self, permissions: list[dict]) -> dict:
412
+ def extract_permissions(self, permissions: Optional[list[dict]]) -> dict:
413
413
  if not permissions:
414
+ logger.debug("no permissions found")
414
415
  return {}
415
416
 
416
417
  # https://developers.google.com/workspace/drive/api/guides/ref-roles
@@ -442,6 +443,7 @@ class GoogleDriveIndexer(Indexer):
442
443
  for key in role_dict:
443
444
  role_dict[key] = sorted(role_dict[key])
444
445
 
446
+ logger.debug(f"normalized permissions generated: {normalized_permissions}")
445
447
  return normalized_permissions
446
448
 
447
449
  def run(self, **kwargs: Any) -> Generator[FileData, None, None]:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: unstructured_ingest
3
- Version: 1.0.4
3
+ Version: 1.0.5
4
4
  Summary: Local ETL data pipeline to get data RAG ready
5
5
  Author-email: Unstructured Technologies <devops@unstructuredai.io>
6
6
  License-Expression: Apache-2.0
@@ -1,5 +1,5 @@
1
1
  unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
2
- unstructured_ingest/__version__.py,sha256=MizK8W2VY6aXUudG1jVogTj7GJ2uwduw5iryFPwi0tM,42
2
+ unstructured_ingest/__version__.py,sha256=64jEAZw6WkJoKXj1K79a59MykYVOTTwLRMWIdobOAH0,42
3
3
  unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
4
4
  unstructured_ingest/errors_v2.py,sha256=9RuRCi7lbDxCguDz07y5RiHoQiFIOWwOD7xqzJ2B3Yw,436
5
5
  unstructured_ingest/logger.py,sha256=7e_7UeK6hVOd5BQ6i9NzRUAPCS_DF839Y8TjUDywraY,1428
@@ -19,7 +19,7 @@ unstructured_ingest/cli/utils/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5N
19
19
  unstructured_ingest/cli/utils/click.py,sha256=1_eJgrwS2DFBl1jZPLsj1vgVgR7agFBIEBe4A_n7mH4,7827
20
20
  unstructured_ingest/cli/utils/model_conversion.py,sha256=hMjAfOVvO1RXTDsw26mmersdncvddkb_rP9JTEgVVCw,7649
21
21
  unstructured_ingest/data_types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
22
- unstructured_ingest/data_types/file_data.py,sha256=E-09hkI4ms4yj-g_aQPIrnm0kbiZLwukCnbwp6OpobQ,3859
22
+ unstructured_ingest/data_types/file_data.py,sha256=7JwwbcgVQdIwCKxrDLUYvJp1f-bzaiGQD8ETr-Ywph8,4571
23
23
  unstructured_ingest/embed/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
24
24
  unstructured_ingest/embed/azure_openai.py,sha256=_-I-nwd-wdCiKkSdYBL4UKrTZ2UPWsM_0T69fcObs_I,1707
25
25
  unstructured_ingest/embed/bedrock.py,sha256=t58V_QQjWPO62CTuP0aLFMDisPeXpxG2xSFGUhN-JvI,7726
@@ -65,13 +65,13 @@ unstructured_ingest/processes/connectors/airtable.py,sha256=smx5qBSUKwM8V6Xcc7ik
65
65
  unstructured_ingest/processes/connectors/astradb.py,sha256=Ob9wQgDxa6BXDPZBOqooNKQgvjIZcMwIe4fW3VlI7h8,18929
66
66
  unstructured_ingest/processes/connectors/azure_ai_search.py,sha256=szhSRXzUHk0DE2hGFfjGc_jNFzlUwiRlCtIkuu7tmnk,11524
67
67
  unstructured_ingest/processes/connectors/chroma.py,sha256=q5_Fu4xb6_W_NyrPxVa3-jVwZLqVdlBNlR4dFvbd7l0,7235
68
- unstructured_ingest/processes/connectors/confluence.py,sha256=7uRgmpX3NcVzA2V7VcngzjMQ69pS0J2wu6cbMp7AFA0,20739
68
+ unstructured_ingest/processes/connectors/confluence.py,sha256=VAHGs_8HPYgBN8s8YwM7-LdzQ5MI_UEWXcMAMdpWLYk,20983
69
69
  unstructured_ingest/processes/connectors/couchbase.py,sha256=KCHoYDNya9B05NIB5D78zXoizFyfpJRepcYBe1nLSOs,12298
70
70
  unstructured_ingest/processes/connectors/delta_table.py,sha256=2DFox_Vzoopt_D3Jy3rCjrrTGMutG2INIrwCeoIohRY,7340
71
71
  unstructured_ingest/processes/connectors/discord.py,sha256=6yEJ_agfKUqsV43wFsbMkcd8lcLJC0uqbo4izjdZ3rU,5294
72
72
  unstructured_ingest/processes/connectors/github.py,sha256=smHCz6jOH1p_hW2S25bYunBBj_pYjz8HTw6wkzaJz_A,7765
73
73
  unstructured_ingest/processes/connectors/gitlab.py,sha256=6h1CdqznJmzeWxGfXrFLdNdT23PExGnUMMX7usK_4Kk,10013
74
- unstructured_ingest/processes/connectors/google_drive.py,sha256=mcplAPbQ_A_MIsIXWc7K0YtEXMIMmluefsrzddJQNFw,21674
74
+ unstructured_ingest/processes/connectors/google_drive.py,sha256=Nu6AA0yDCrtoSq5hqvpKJFNRFF0JcxHjZtDVbLay33Q,21817
75
75
  unstructured_ingest/processes/connectors/jira.py,sha256=eG8yTn8ZVEz7rBJ-ha8i_d9hEh6VALN6QJT_vbYvbL0,17142
76
76
  unstructured_ingest/processes/connectors/kdbai.py,sha256=XhxYpKSAoFPBsDQWwNuLX03DCxOVr7yquj9VYM55Rtc,5174
77
77
  unstructured_ingest/processes/connectors/local.py,sha256=LluTLKv4g7FbJb4A6vuSxI9VhzKZuuQUpDS-cVNAQ2g,7426
@@ -230,8 +230,8 @@ unstructured_ingest/utils/ndjson.py,sha256=nz8VUOPEgAFdhaDOpuveknvCU4x82fVwqE01q
230
230
  unstructured_ingest/utils/pydantic_models.py,sha256=BT_j15e4rX40wQbt8LUXbqfPhA3rJn1PHTI_G_A_EHY,1720
231
231
  unstructured_ingest/utils/string_and_date_utils.py,sha256=oXOI6rxXq-8ncbk7EoJK0WCcTXWj75EzKl8pfQMID3U,2522
232
232
  unstructured_ingest/utils/table.py,sha256=WZechczgVFvlodUWFcsnCGvBNh1xRm6hr0VbJTPxKAc,3669
233
- unstructured_ingest-1.0.4.dist-info/METADATA,sha256=ZrV3WL4OOzjU53IKTL59o3dr5UIDRrSGewp-tFGfSF8,8719
234
- unstructured_ingest-1.0.4.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
235
- unstructured_ingest-1.0.4.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
236
- unstructured_ingest-1.0.4.dist-info/licenses/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
237
- unstructured_ingest-1.0.4.dist-info/RECORD,,
233
+ unstructured_ingest-1.0.5.dist-info/METADATA,sha256=D8wUuNkaBZMshLsm-S5kcLGgsJOv-xO6naAFJM2eVqI,8719
234
+ unstructured_ingest-1.0.5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
235
+ unstructured_ingest-1.0.5.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
236
+ unstructured_ingest-1.0.5.dist-info/licenses/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
237
+ unstructured_ingest-1.0.5.dist-info/RECORD,,