unstructured-ingest 1.0.33__py3-none-any.whl → 1.0.35__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- unstructured_ingest/__version__.py +1 -1
- unstructured_ingest/processes/connectors/confluence.py +31 -9
- unstructured_ingest/processes/connectors/databricks/volumes.py +3 -2
- unstructured_ingest/processes/connectors/fsspec/fsspec.py +4 -3
- unstructured_ingest/processes/connectors/onedrive.py +5 -6
- {unstructured_ingest-1.0.33.dist-info → unstructured_ingest-1.0.35.dist-info}/METADATA +1 -1
- {unstructured_ingest-1.0.33.dist-info → unstructured_ingest-1.0.35.dist-info}/RECORD +10 -10
- {unstructured_ingest-1.0.33.dist-info → unstructured_ingest-1.0.35.dist-info}/WHEEL +0 -0
- {unstructured_ingest-1.0.33.dist-info → unstructured_ingest-1.0.35.dist-info}/entry_points.txt +0 -0
- {unstructured_ingest-1.0.33.dist-info → unstructured_ingest-1.0.35.dist-info}/licenses/LICENSE.md +0 -0
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "1.0.
|
|
1
|
+
__version__ = "1.0.35" # pragma: no cover
|
|
@@ -12,6 +12,7 @@ from unstructured_ingest.data_types.file_data import (
|
|
|
12
12
|
SourceIdentifiers,
|
|
13
13
|
)
|
|
14
14
|
from unstructured_ingest.error import SourceConnectionError
|
|
15
|
+
from unstructured_ingest.errors_v2 import UserAuthError, UserError
|
|
15
16
|
from unstructured_ingest.interfaces import (
|
|
16
17
|
AccessConfig,
|
|
17
18
|
ConnectionConfig,
|
|
@@ -96,7 +97,7 @@ class ConfluenceConnectionConfig(ConnectionConfig):
|
|
|
96
97
|
|
|
97
98
|
@requires_dependencies(["atlassian"], extras="confluence")
|
|
98
99
|
@contextmanager
|
|
99
|
-
def get_client(self) -> "Confluence":
|
|
100
|
+
def get_client(self) -> Generator["Confluence", None, None]:
|
|
100
101
|
from atlassian import Confluence
|
|
101
102
|
|
|
102
103
|
access_configs = self.access_config.get_secret_value()
|
|
@@ -126,15 +127,36 @@ class ConfluenceIndexer(Indexer):
|
|
|
126
127
|
|
|
127
128
|
def precheck(self) -> bool:
|
|
128
129
|
try:
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
130
|
+
self.connection_config.get_client()
|
|
131
|
+
except Exception as e:
|
|
132
|
+
logger.exception(f"Failed to connect to Confluence: {e}")
|
|
133
|
+
raise UserAuthError(f"Failed to connect to Confluence: {e}")
|
|
134
|
+
|
|
135
|
+
with self.connection_config.get_client() as client:
|
|
136
|
+
# opportunistically check the first space in list of all spaces
|
|
137
|
+
try:
|
|
132
138
|
client.get_all_spaces(limit=1)
|
|
139
|
+
except Exception as e:
|
|
140
|
+
logger.exception(f"Failed to connect to find any Confluence space: {e}")
|
|
141
|
+
raise UserError(f"Failed to connect to find any Confluence space: {e}")
|
|
142
|
+
|
|
133
143
|
logger.info("Connection to Confluence successful.")
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
144
|
+
|
|
145
|
+
# If specific spaces are provided, check if we can access them
|
|
146
|
+
errors = []
|
|
147
|
+
|
|
148
|
+
if self.index_config.spaces:
|
|
149
|
+
for space_key in self.index_config.spaces:
|
|
150
|
+
try:
|
|
151
|
+
client.get_space(space_key)
|
|
152
|
+
except Exception as e:
|
|
153
|
+
logger.exception(f"Failed to connect to Confluence: {e}")
|
|
154
|
+
errors.append(f"Failed to connect to '{space_key}' space, cause: '{e}'")
|
|
155
|
+
|
|
156
|
+
if errors:
|
|
157
|
+
raise UserError("\n".join(errors))
|
|
158
|
+
|
|
159
|
+
return True
|
|
138
160
|
|
|
139
161
|
def _get_space_ids_and_keys(self) -> List[Tuple[str, int]]:
|
|
140
162
|
"""
|
|
@@ -406,7 +428,7 @@ class ConfluenceDownloader(Downloader):
|
|
|
406
428
|
expand="history.lastUpdated,version,body.view",
|
|
407
429
|
)
|
|
408
430
|
except Exception as e:
|
|
409
|
-
logger.
|
|
431
|
+
logger.exception(f"Failed to retrieve page with ID {doc_id}: {e}")
|
|
410
432
|
raise SourceConnectionError(f"Failed to retrieve page with ID {doc_id}: {e}")
|
|
411
433
|
|
|
412
434
|
if not page:
|
|
@@ -196,9 +196,10 @@ class DatabricksVolumesUploader(Uploader, ABC):
|
|
|
196
196
|
connection_config: DatabricksVolumesConnectionConfig
|
|
197
197
|
|
|
198
198
|
def get_output_path(self, file_data: FileData) -> str:
|
|
199
|
-
if file_data.source_identifiers.
|
|
199
|
+
if file_data.source_identifiers.relative_path:
|
|
200
200
|
return os.path.join(
|
|
201
|
-
self.upload_config.path,
|
|
201
|
+
self.upload_config.path,
|
|
202
|
+
f"{file_data.source_identifiers.relative_path.lstrip('/')}.json",
|
|
202
203
|
)
|
|
203
204
|
else:
|
|
204
205
|
return os.path.join(
|
|
@@ -343,9 +343,10 @@ class FsspecUploader(Uploader):
|
|
|
343
343
|
raise self.wrap_error(e=e)
|
|
344
344
|
|
|
345
345
|
def get_upload_path(self, file_data: FileData) -> Path:
|
|
346
|
-
upload_path =
|
|
347
|
-
self.upload_config.path_without_protocol
|
|
348
|
-
|
|
346
|
+
upload_path = (
|
|
347
|
+
Path(self.upload_config.path_without_protocol)
|
|
348
|
+
/ file_data.source_identifiers.relative_path.lstrip("/")
|
|
349
|
+
)
|
|
349
350
|
updated_upload_path = upload_path.parent / f"{upload_path.name}.json"
|
|
350
351
|
return updated_upload_path
|
|
351
352
|
|
|
@@ -369,15 +369,14 @@ class OnedriveUploader(Uploader):
|
|
|
369
369
|
|
|
370
370
|
# Use the remote_url from upload_config as the base destination folder
|
|
371
371
|
base_destination_folder = self.upload_config.url
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
# Combine the base destination folder with the file's full path
|
|
372
|
+
# Use the file's relative path to maintain directory structure, if needed
|
|
373
|
+
if file_data.source_identifiers and file_data.source_identifiers.relative_path:
|
|
374
|
+
# Combine the base destination folder with the file's relative path
|
|
376
375
|
destination_path = Path(base_destination_folder) / Path(
|
|
377
|
-
f"{file_data.source_identifiers.
|
|
376
|
+
f"{file_data.source_identifiers.relative_path}.json"
|
|
378
377
|
)
|
|
379
378
|
else:
|
|
380
|
-
# If no
|
|
379
|
+
# If no relative path is provided, upload directly to the base destination folder
|
|
381
380
|
destination_path = Path(base_destination_folder) / f"{path.name}.json"
|
|
382
381
|
|
|
383
382
|
destination_folder = destination_path.parent
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
|
|
2
|
-
unstructured_ingest/__version__.py,sha256=
|
|
2
|
+
unstructured_ingest/__version__.py,sha256=b3qTfBCIVt9b6BKAiKwjS-S1JYzG3JrNJz133p_CIH8,43
|
|
3
3
|
unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
|
|
4
4
|
unstructured_ingest/errors_v2.py,sha256=9RuRCi7lbDxCguDz07y5RiHoQiFIOWwOD7xqzJ2B3Yw,436
|
|
5
5
|
unstructured_ingest/logger.py,sha256=7e_7UeK6hVOd5BQ6i9NzRUAPCS_DF839Y8TjUDywraY,1428
|
|
@@ -66,7 +66,7 @@ unstructured_ingest/processes/connectors/airtable.py,sha256=smx5qBSUKwM8V6Xcc7ik
|
|
|
66
66
|
unstructured_ingest/processes/connectors/astradb.py,sha256=Ob9wQgDxa6BXDPZBOqooNKQgvjIZcMwIe4fW3VlI7h8,18929
|
|
67
67
|
unstructured_ingest/processes/connectors/azure_ai_search.py,sha256=szhSRXzUHk0DE2hGFfjGc_jNFzlUwiRlCtIkuu7tmnk,11524
|
|
68
68
|
unstructured_ingest/processes/connectors/chroma.py,sha256=q5_Fu4xb6_W_NyrPxVa3-jVwZLqVdlBNlR4dFvbd7l0,7235
|
|
69
|
-
unstructured_ingest/processes/connectors/confluence.py,sha256=
|
|
69
|
+
unstructured_ingest/processes/connectors/confluence.py,sha256=C62LVwZYk7H8RfiPb0mbxig2osW5u7KvHIlz4qOJU-0,21954
|
|
70
70
|
unstructured_ingest/processes/connectors/couchbase.py,sha256=KCHoYDNya9B05NIB5D78zXoizFyfpJRepcYBe1nLSOs,12298
|
|
71
71
|
unstructured_ingest/processes/connectors/delta_table.py,sha256=2DFox_Vzoopt_D3Jy3rCjrrTGMutG2INIrwCeoIohRY,7340
|
|
72
72
|
unstructured_ingest/processes/connectors/discord.py,sha256=6yEJ_agfKUqsV43wFsbMkcd8lcLJC0uqbo4izjdZ3rU,5294
|
|
@@ -79,7 +79,7 @@ unstructured_ingest/processes/connectors/local.py,sha256=LluTLKv4g7FbJb4A6vuSxI9
|
|
|
79
79
|
unstructured_ingest/processes/connectors/milvus.py,sha256=Jr9cul7By03tGAPFnFBoqncnNWwbhKd-qbmkuqnin8U,8908
|
|
80
80
|
unstructured_ingest/processes/connectors/mongodb.py,sha256=1g_5bfbS6lah3nsOXqLAanR3zNYJ47_Njw_uV-uj3_U,14324
|
|
81
81
|
unstructured_ingest/processes/connectors/neo4j.py,sha256=ztxvI9KY8RF5kYUuMGSzzN5mz7Fu_4Ai9P7dqCpJLc0,20267
|
|
82
|
-
unstructured_ingest/processes/connectors/onedrive.py,sha256=
|
|
82
|
+
unstructured_ingest/processes/connectors/onedrive.py,sha256=JIADpc31PI9Yzr0raF6bSqzes2jhfcniUzew1aKVWeI,19305
|
|
83
83
|
unstructured_ingest/processes/connectors/outlook.py,sha256=zHM5frO7CqQG0-KcTyX49aZeSlsvVrl8kh_lR_ESgQw,9275
|
|
84
84
|
unstructured_ingest/processes/connectors/pinecone.py,sha256=pSREUNsQqel6q1EFZsFWelg-uZgGubQY5m_6nVnBFKs,15090
|
|
85
85
|
unstructured_ingest/processes/connectors/redisdb.py,sha256=rTihbfv0Mlk1eo5Izn-JXRu5Ad5C-KD58nSqeKsaZJ8,8024
|
|
@@ -92,7 +92,7 @@ unstructured_ingest/processes/connectors/assets/__init__.py,sha256=47DEQpj8HBSa-
|
|
|
92
92
|
unstructured_ingest/processes/connectors/assets/databricks_delta_table_schema.sql,sha256=8a9HTcRWA6IuswSD632b_uZSO6Dax_0rUYnflqktcek,226
|
|
93
93
|
unstructured_ingest/processes/connectors/assets/weaviate_collection_config.json,sha256=SJlIO0kXxy866tWQ8bEzvwLwflsoUMIS-OKlxMvHIuE,504
|
|
94
94
|
unstructured_ingest/processes/connectors/databricks/__init__.py,sha256=RtKAPyNtXh6fzEsOQ08pA0-vC1uMr3KqYG6cqiBoo70,2133
|
|
95
|
-
unstructured_ingest/processes/connectors/databricks/volumes.py,sha256=
|
|
95
|
+
unstructured_ingest/processes/connectors/databricks/volumes.py,sha256=yT5JFbVzAEOJsKjfGH8KG3eQfKaTNFEsg_FVDPVK7Xs,8271
|
|
96
96
|
unstructured_ingest/processes/connectors/databricks/volumes_aws.py,sha256=WhGTp6aRTLSdc4GChCL4mz2b-IanderW8j1IqezX6YA,2958
|
|
97
97
|
unstructured_ingest/processes/connectors/databricks/volumes_azure.py,sha256=pF2d6uAIbwJJUeOIG5xknUMCGc5d9Aztmc2776wp-a0,3740
|
|
98
98
|
unstructured_ingest/processes/connectors/databricks/volumes_gcp.py,sha256=y9AvVl6PtnIxlTlrPj_wyHBDBRJNq3uoTOuZwTryNg8,2994
|
|
@@ -109,7 +109,7 @@ unstructured_ingest/processes/connectors/fsspec/__init__.py,sha256=3HTdw4L4mdN4W
|
|
|
109
109
|
unstructured_ingest/processes/connectors/fsspec/azure.py,sha256=31VNiG5YnXfhrFX7QJ2O1ubeWHxbe1sYVIztefbscAQ,7148
|
|
110
110
|
unstructured_ingest/processes/connectors/fsspec/box.py,sha256=1gLS7xR2vbjgKBrQ4ZpI1fKTsJuIDfXuAzx_a4FzxG4,5873
|
|
111
111
|
unstructured_ingest/processes/connectors/fsspec/dropbox.py,sha256=HwwKjQmjM7yFk9Esh_F20xDisRPXGUkFduzaasByRDE,8355
|
|
112
|
-
unstructured_ingest/processes/connectors/fsspec/fsspec.py,sha256=
|
|
112
|
+
unstructured_ingest/processes/connectors/fsspec/fsspec.py,sha256=d_ig69_tuSWczwPxzZue1xTYMYqYqUe-dg1jMEjC8M0,14481
|
|
113
113
|
unstructured_ingest/processes/connectors/fsspec/gcs.py,sha256=ouxISCKpZTAj3T6pWGYbASu93wytJjl5WSICvQcrgfE,7172
|
|
114
114
|
unstructured_ingest/processes/connectors/fsspec/s3.py,sha256=2ZV6b2E2pIsf_ab1Lty74FwpMnJZhpQUdamPgpwcKsQ,7141
|
|
115
115
|
unstructured_ingest/processes/connectors/fsspec/sftp.py,sha256=pR_a2SgLjt8ffNkariHrPB1E0HVSTj5h3pt7KxTU3TI,6371
|
|
@@ -231,8 +231,8 @@ unstructured_ingest/utils/ndjson.py,sha256=nz8VUOPEgAFdhaDOpuveknvCU4x82fVwqE01q
|
|
|
231
231
|
unstructured_ingest/utils/pydantic_models.py,sha256=BT_j15e4rX40wQbt8LUXbqfPhA3rJn1PHTI_G_A_EHY,1720
|
|
232
232
|
unstructured_ingest/utils/string_and_date_utils.py,sha256=oXOI6rxXq-8ncbk7EoJK0WCcTXWj75EzKl8pfQMID3U,2522
|
|
233
233
|
unstructured_ingest/utils/table.py,sha256=WZechczgVFvlodUWFcsnCGvBNh1xRm6hr0VbJTPxKAc,3669
|
|
234
|
-
unstructured_ingest-1.0.
|
|
235
|
-
unstructured_ingest-1.0.
|
|
236
|
-
unstructured_ingest-1.0.
|
|
237
|
-
unstructured_ingest-1.0.
|
|
238
|
-
unstructured_ingest-1.0.
|
|
234
|
+
unstructured_ingest-1.0.35.dist-info/METADATA,sha256=rpeaT-RpY6IFgDR7tIIxylJL1t-geRuhk3QKQq6JSDY,8747
|
|
235
|
+
unstructured_ingest-1.0.35.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
236
|
+
unstructured_ingest-1.0.35.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
|
|
237
|
+
unstructured_ingest-1.0.35.dist-info/licenses/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
|
|
238
|
+
unstructured_ingest-1.0.35.dist-info/RECORD,,
|
|
File without changes
|
{unstructured_ingest-1.0.33.dist-info → unstructured_ingest-1.0.35.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.33.dist-info → unstructured_ingest-1.0.35.dist-info}/licenses/LICENSE.md
RENAMED
|
File without changes
|