unstructured-ingest 1.0.33__py3-none-any.whl → 1.0.35__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

@@ -1 +1 @@
1
- __version__ = "1.0.33" # pragma: no cover
1
+ __version__ = "1.0.35" # pragma: no cover
@@ -12,6 +12,7 @@ from unstructured_ingest.data_types.file_data import (
12
12
  SourceIdentifiers,
13
13
  )
14
14
  from unstructured_ingest.error import SourceConnectionError
15
+ from unstructured_ingest.errors_v2 import UserAuthError, UserError
15
16
  from unstructured_ingest.interfaces import (
16
17
  AccessConfig,
17
18
  ConnectionConfig,
@@ -96,7 +97,7 @@ class ConfluenceConnectionConfig(ConnectionConfig):
96
97
 
97
98
  @requires_dependencies(["atlassian"], extras="confluence")
98
99
  @contextmanager
99
- def get_client(self) -> "Confluence":
100
+ def get_client(self) -> Generator["Confluence", None, None]:
100
101
  from atlassian import Confluence
101
102
 
102
103
  access_configs = self.access_config.get_secret_value()
@@ -126,15 +127,36 @@ class ConfluenceIndexer(Indexer):
126
127
 
127
128
  def precheck(self) -> bool:
128
129
  try:
129
- # Attempt to retrieve a list of spaces with limit=1.
130
- # This should only succeed if all creds are valid
131
- with self.connection_config.get_client() as client:
130
+ self.connection_config.get_client()
131
+ except Exception as e:
132
+ logger.exception(f"Failed to connect to Confluence: {e}")
133
+ raise UserAuthError(f"Failed to connect to Confluence: {e}")
134
+
135
+ with self.connection_config.get_client() as client:
136
+ # opportunistically check the first space in list of all spaces
137
+ try:
132
138
  client.get_all_spaces(limit=1)
139
+ except Exception as e:
140
+ logger.exception(f"Failed to connect to find any Confluence space: {e}")
141
+ raise UserError(f"Failed to connect to find any Confluence space: {e}")
142
+
133
143
  logger.info("Connection to Confluence successful.")
134
- return True
135
- except Exception as e:
136
- logger.error(f"Failed to connect to Confluence: {e}", exc_info=True)
137
- raise SourceConnectionError(f"Failed to connect to Confluence: {e}")
144
+
145
+ # If specific spaces are provided, check if we can access them
146
+ errors = []
147
+
148
+ if self.index_config.spaces:
149
+ for space_key in self.index_config.spaces:
150
+ try:
151
+ client.get_space(space_key)
152
+ except Exception as e:
153
+ logger.exception(f"Failed to connect to Confluence: {e}")
154
+ errors.append(f"Failed to connect to '{space_key}' space, cause: '{e}'")
155
+
156
+ if errors:
157
+ raise UserError("\n".join(errors))
158
+
159
+ return True
138
160
 
139
161
  def _get_space_ids_and_keys(self) -> List[Tuple[str, int]]:
140
162
  """
@@ -406,7 +428,7 @@ class ConfluenceDownloader(Downloader):
406
428
  expand="history.lastUpdated,version,body.view",
407
429
  )
408
430
  except Exception as e:
409
- logger.error(f"Failed to retrieve page with ID {doc_id}: {e}", exc_info=True)
431
+ logger.exception(f"Failed to retrieve page with ID {doc_id}: {e}")
410
432
  raise SourceConnectionError(f"Failed to retrieve page with ID {doc_id}: {e}")
411
433
 
412
434
  if not page:
@@ -196,9 +196,10 @@ class DatabricksVolumesUploader(Uploader, ABC):
196
196
  connection_config: DatabricksVolumesConnectionConfig
197
197
 
198
198
  def get_output_path(self, file_data: FileData) -> str:
199
- if file_data.source_identifiers.fullpath:
199
+ if file_data.source_identifiers.relative_path:
200
200
  return os.path.join(
201
- self.upload_config.path, f"{file_data.source_identifiers.fullpath}.json"
201
+ self.upload_config.path,
202
+ f"{file_data.source_identifiers.relative_path.lstrip('/')}.json",
202
203
  )
203
204
  else:
204
205
  return os.path.join(
@@ -343,9 +343,10 @@ class FsspecUploader(Uploader):
343
343
  raise self.wrap_error(e=e)
344
344
 
345
345
  def get_upload_path(self, file_data: FileData) -> Path:
346
- upload_path = Path(
347
- self.upload_config.path_without_protocol
348
- ) / file_data.source_identifiers.fullpath.lstrip("/")
346
+ upload_path = (
347
+ Path(self.upload_config.path_without_protocol)
348
+ / file_data.source_identifiers.relative_path.lstrip("/")
349
+ )
349
350
  updated_upload_path = upload_path.parent / f"{upload_path.name}.json"
350
351
  return updated_upload_path
351
352
 
@@ -369,15 +369,14 @@ class OnedriveUploader(Uploader):
369
369
 
370
370
  # Use the remote_url from upload_config as the base destination folder
371
371
  base_destination_folder = self.upload_config.url
372
-
373
- # Use the file's full path to maintain directory structure, if needed
374
- if file_data.source_identifiers and file_data.source_identifiers.fullpath:
375
- # Combine the base destination folder with the file's full path
372
+ # Use the file's relative path to maintain directory structure, if needed
373
+ if file_data.source_identifiers and file_data.source_identifiers.relative_path:
374
+ # Combine the base destination folder with the file's relative path
376
375
  destination_path = Path(base_destination_folder) / Path(
377
- f"{file_data.source_identifiers.fullpath}.json"
376
+ f"{file_data.source_identifiers.relative_path}.json"
378
377
  )
379
378
  else:
380
- # If no full path is provided, upload directly to the base destination folder
379
+ # If no relative path is provided, upload directly to the base destination folder
381
380
  destination_path = Path(base_destination_folder) / f"{path.name}.json"
382
381
 
383
382
  destination_folder = destination_path.parent
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: unstructured_ingest
3
- Version: 1.0.33
3
+ Version: 1.0.35
4
4
  Summary: Local ETL data pipeline to get data RAG ready
5
5
  Author-email: Unstructured Technologies <devops@unstructuredai.io>
6
6
  License-Expression: Apache-2.0
@@ -1,5 +1,5 @@
1
1
  unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
2
- unstructured_ingest/__version__.py,sha256=moV2VeZrrB_QVKOvny6NjEoowwTiGToZWfDpKig5QOQ,43
2
+ unstructured_ingest/__version__.py,sha256=b3qTfBCIVt9b6BKAiKwjS-S1JYzG3JrNJz133p_CIH8,43
3
3
  unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
4
4
  unstructured_ingest/errors_v2.py,sha256=9RuRCi7lbDxCguDz07y5RiHoQiFIOWwOD7xqzJ2B3Yw,436
5
5
  unstructured_ingest/logger.py,sha256=7e_7UeK6hVOd5BQ6i9NzRUAPCS_DF839Y8TjUDywraY,1428
@@ -66,7 +66,7 @@ unstructured_ingest/processes/connectors/airtable.py,sha256=smx5qBSUKwM8V6Xcc7ik
66
66
  unstructured_ingest/processes/connectors/astradb.py,sha256=Ob9wQgDxa6BXDPZBOqooNKQgvjIZcMwIe4fW3VlI7h8,18929
67
67
  unstructured_ingest/processes/connectors/azure_ai_search.py,sha256=szhSRXzUHk0DE2hGFfjGc_jNFzlUwiRlCtIkuu7tmnk,11524
68
68
  unstructured_ingest/processes/connectors/chroma.py,sha256=q5_Fu4xb6_W_NyrPxVa3-jVwZLqVdlBNlR4dFvbd7l0,7235
69
- unstructured_ingest/processes/connectors/confluence.py,sha256=1oT4A83jSOWR8u8kldHImOBqSLxctdlsR-AZpzJfO9w,21098
69
+ unstructured_ingest/processes/connectors/confluence.py,sha256=C62LVwZYk7H8RfiPb0mbxig2osW5u7KvHIlz4qOJU-0,21954
70
70
  unstructured_ingest/processes/connectors/couchbase.py,sha256=KCHoYDNya9B05NIB5D78zXoizFyfpJRepcYBe1nLSOs,12298
71
71
  unstructured_ingest/processes/connectors/delta_table.py,sha256=2DFox_Vzoopt_D3Jy3rCjrrTGMutG2INIrwCeoIohRY,7340
72
72
  unstructured_ingest/processes/connectors/discord.py,sha256=6yEJ_agfKUqsV43wFsbMkcd8lcLJC0uqbo4izjdZ3rU,5294
@@ -79,7 +79,7 @@ unstructured_ingest/processes/connectors/local.py,sha256=LluTLKv4g7FbJb4A6vuSxI9
79
79
  unstructured_ingest/processes/connectors/milvus.py,sha256=Jr9cul7By03tGAPFnFBoqncnNWwbhKd-qbmkuqnin8U,8908
80
80
  unstructured_ingest/processes/connectors/mongodb.py,sha256=1g_5bfbS6lah3nsOXqLAanR3zNYJ47_Njw_uV-uj3_U,14324
81
81
  unstructured_ingest/processes/connectors/neo4j.py,sha256=ztxvI9KY8RF5kYUuMGSzzN5mz7Fu_4Ai9P7dqCpJLc0,20267
82
- unstructured_ingest/processes/connectors/onedrive.py,sha256=k0bhQCCSIgmHAk3lQd4CMA3dc4fPAjegNlLxlDWGowc,19284
82
+ unstructured_ingest/processes/connectors/onedrive.py,sha256=JIADpc31PI9Yzr0raF6bSqzes2jhfcniUzew1aKVWeI,19305
83
83
  unstructured_ingest/processes/connectors/outlook.py,sha256=zHM5frO7CqQG0-KcTyX49aZeSlsvVrl8kh_lR_ESgQw,9275
84
84
  unstructured_ingest/processes/connectors/pinecone.py,sha256=pSREUNsQqel6q1EFZsFWelg-uZgGubQY5m_6nVnBFKs,15090
85
85
  unstructured_ingest/processes/connectors/redisdb.py,sha256=rTihbfv0Mlk1eo5Izn-JXRu5Ad5C-KD58nSqeKsaZJ8,8024
@@ -92,7 +92,7 @@ unstructured_ingest/processes/connectors/assets/__init__.py,sha256=47DEQpj8HBSa-
92
92
  unstructured_ingest/processes/connectors/assets/databricks_delta_table_schema.sql,sha256=8a9HTcRWA6IuswSD632b_uZSO6Dax_0rUYnflqktcek,226
93
93
  unstructured_ingest/processes/connectors/assets/weaviate_collection_config.json,sha256=SJlIO0kXxy866tWQ8bEzvwLwflsoUMIS-OKlxMvHIuE,504
94
94
  unstructured_ingest/processes/connectors/databricks/__init__.py,sha256=RtKAPyNtXh6fzEsOQ08pA0-vC1uMr3KqYG6cqiBoo70,2133
95
- unstructured_ingest/processes/connectors/databricks/volumes.py,sha256=fZeXRozTUM3JeZlmsxhn_glqRhxr8CGG-8I8QRhRcP8,8232
95
+ unstructured_ingest/processes/connectors/databricks/volumes.py,sha256=yT5JFbVzAEOJsKjfGH8KG3eQfKaTNFEsg_FVDPVK7Xs,8271
96
96
  unstructured_ingest/processes/connectors/databricks/volumes_aws.py,sha256=WhGTp6aRTLSdc4GChCL4mz2b-IanderW8j1IqezX6YA,2958
97
97
  unstructured_ingest/processes/connectors/databricks/volumes_azure.py,sha256=pF2d6uAIbwJJUeOIG5xknUMCGc5d9Aztmc2776wp-a0,3740
98
98
  unstructured_ingest/processes/connectors/databricks/volumes_gcp.py,sha256=y9AvVl6PtnIxlTlrPj_wyHBDBRJNq3uoTOuZwTryNg8,2994
@@ -109,7 +109,7 @@ unstructured_ingest/processes/connectors/fsspec/__init__.py,sha256=3HTdw4L4mdN4W
109
109
  unstructured_ingest/processes/connectors/fsspec/azure.py,sha256=31VNiG5YnXfhrFX7QJ2O1ubeWHxbe1sYVIztefbscAQ,7148
110
110
  unstructured_ingest/processes/connectors/fsspec/box.py,sha256=1gLS7xR2vbjgKBrQ4ZpI1fKTsJuIDfXuAzx_a4FzxG4,5873
111
111
  unstructured_ingest/processes/connectors/fsspec/dropbox.py,sha256=HwwKjQmjM7yFk9Esh_F20xDisRPXGUkFduzaasByRDE,8355
112
- unstructured_ingest/processes/connectors/fsspec/fsspec.py,sha256=4K8Q2D_6_HCqTVM3HBJv3SNz9gjbQhk44nzeSheDpzA,14462
112
+ unstructured_ingest/processes/connectors/fsspec/fsspec.py,sha256=d_ig69_tuSWczwPxzZue1xTYMYqYqUe-dg1jMEjC8M0,14481
113
113
  unstructured_ingest/processes/connectors/fsspec/gcs.py,sha256=ouxISCKpZTAj3T6pWGYbASu93wytJjl5WSICvQcrgfE,7172
114
114
  unstructured_ingest/processes/connectors/fsspec/s3.py,sha256=2ZV6b2E2pIsf_ab1Lty74FwpMnJZhpQUdamPgpwcKsQ,7141
115
115
  unstructured_ingest/processes/connectors/fsspec/sftp.py,sha256=pR_a2SgLjt8ffNkariHrPB1E0HVSTj5h3pt7KxTU3TI,6371
@@ -231,8 +231,8 @@ unstructured_ingest/utils/ndjson.py,sha256=nz8VUOPEgAFdhaDOpuveknvCU4x82fVwqE01q
231
231
  unstructured_ingest/utils/pydantic_models.py,sha256=BT_j15e4rX40wQbt8LUXbqfPhA3rJn1PHTI_G_A_EHY,1720
232
232
  unstructured_ingest/utils/string_and_date_utils.py,sha256=oXOI6rxXq-8ncbk7EoJK0WCcTXWj75EzKl8pfQMID3U,2522
233
233
  unstructured_ingest/utils/table.py,sha256=WZechczgVFvlodUWFcsnCGvBNh1xRm6hr0VbJTPxKAc,3669
234
- unstructured_ingest-1.0.33.dist-info/METADATA,sha256=d8F0hFb3s-aLloV1TGFLDHRa8CHwuTduHFS1neEHu6s,8747
235
- unstructured_ingest-1.0.33.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
236
- unstructured_ingest-1.0.33.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
237
- unstructured_ingest-1.0.33.dist-info/licenses/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
238
- unstructured_ingest-1.0.33.dist-info/RECORD,,
234
+ unstructured_ingest-1.0.35.dist-info/METADATA,sha256=rpeaT-RpY6IFgDR7tIIxylJL1t-geRuhk3QKQq6JSDY,8747
235
+ unstructured_ingest-1.0.35.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
236
+ unstructured_ingest-1.0.35.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
237
+ unstructured_ingest-1.0.35.dist-info/licenses/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
238
+ unstructured_ingest-1.0.35.dist-info/RECORD,,