unstructured-ingest 1.1.0__py3-none-any.whl → 1.1.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

@@ -1 +1 @@
1
- __version__ = "1.1.0" # pragma: no cover
1
+ __version__ = "1.1.3" # pragma: no cover
@@ -48,7 +48,8 @@ class DeltaTableAccessConfig(AccessConfig):
48
48
 
49
49
  class DeltaTableConnectionConfig(ConnectionConfig):
50
50
  access_config: Secret[DeltaTableAccessConfig] = Field(
51
- default=Secret(DeltaTableAccessConfig()), validate_default=True
51
+ default_factory=lambda: Secret[DeltaTableAccessConfig](DeltaTableAccessConfig()),
52
+ validate_default=True,
52
53
  )
53
54
  aws_region: Optional[str] = Field(default=None, description="AWS Region")
54
55
  table_uri: str = Field(
@@ -19,7 +19,6 @@ from unstructured_ingest.error import (
19
19
  DestinationConnectionError,
20
20
  SourceConnectionError,
21
21
  SourceConnectionNetworkError,
22
- WriteError,
23
22
  )
24
23
  from unstructured_ingest.interfaces import (
25
24
  AccessConfig,
@@ -336,6 +335,8 @@ class ElasticsearchUploadStager(UploadStager):
336
335
 
337
336
  def conform_dict(self, element_dict: dict, file_data: FileData) -> dict:
338
337
  data = element_dict.copy()
338
+ # when _op_type is not specified, it defaults to "index":
339
+ # Overwrites if exists, creates if not.
339
340
  resp = {
340
341
  "_index": self.upload_stager_config.index_name,
341
342
  "_id": get_enhanced_element_id(element_dict=data, file_data=file_data),
@@ -397,23 +398,6 @@ class ElasticsearchUploader(Uploader):
397
398
 
398
399
  return parallel_bulk
399
400
 
400
- def delete_by_record_id(self, client, file_data: FileData) -> None:
401
- logger.debug(
402
- f"deleting any content with metadata {RECORD_ID_LABEL}={file_data.identifier} "
403
- f"from {self.upload_config.index_name} index"
404
- )
405
- delete_resp = client.delete_by_query(
406
- index=self.upload_config.index_name,
407
- body={"query": {"match": {self.upload_config.record_id_key: file_data.identifier}}},
408
- )
409
- logger.info(
410
- "deleted {} records from index {}".format(
411
- delete_resp["deleted"], self.upload_config.index_name
412
- )
413
- )
414
- if failures := delete_resp.get("failures"):
415
- raise WriteError(f"failed to delete records: {failures}")
416
-
417
401
  @requires_dependencies(["elasticsearch"], extras="elasticsearch")
418
402
  def run_data(self, data: list[dict], file_data: FileData, **kwargs: Any) -> None: # noqa: E501
419
403
  from elasticsearch.helpers.errors import BulkIndexError
@@ -429,7 +413,6 @@ class ElasticsearchUploader(Uploader):
429
413
  )
430
414
 
431
415
  with self.connection_config.get_client() as client:
432
- self.delete_by_record_id(client=client, file_data=file_data)
433
416
  if not client.indices.exists(index=self.upload_config.index_name):
434
417
  logger.warning(
435
418
  f"{(self.__class__.__name__).replace('Uploader', '')} index does not exist: "
@@ -446,6 +429,10 @@ class ElasticsearchUploader(Uploader):
446
429
  thread_count=self.upload_config.num_threads,
447
430
  )
448
431
  collections.deque(iterator, maxlen=0)
432
+ logger.info(
433
+ f"uploaded batch of {len(batch)} elements to index "
434
+ f"{self.upload_config.index_name}"
435
+ )
449
436
  except BulkIndexError as e:
450
437
  sanitized_errors = [
451
438
  self._sanitize_bulk_index_error(error) for error in e.errors
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: unstructured_ingest
3
- Version: 1.1.0
3
+ Version: 1.1.3
4
4
  Summary: Local ETL data pipeline to get data RAG ready
5
5
  Author-email: Unstructured Technologies <devops@unstructuredai.io>
6
6
  License-Expression: Apache-2.0
@@ -1,5 +1,5 @@
1
1
  unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
2
- unstructured_ingest/__version__.py,sha256=OTJtt59bB59UuRwC7CjPgJNmkdDC7RUC5Ukrfd-P-CE,42
2
+ unstructured_ingest/__version__.py,sha256=t-wmxuTrwUNkiwss9snVysVtVPGHyGwXuWW2QDRpdec,42
3
3
  unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
4
4
  unstructured_ingest/errors_v2.py,sha256=9RuRCi7lbDxCguDz07y5RiHoQiFIOWwOD7xqzJ2B3Yw,436
5
5
  unstructured_ingest/logger.py,sha256=7e_7UeK6hVOd5BQ6i9NzRUAPCS_DF839Y8TjUDywraY,1428
@@ -68,7 +68,7 @@ unstructured_ingest/processes/connectors/azure_ai_search.py,sha256=szhSRXzUHk0DE
68
68
  unstructured_ingest/processes/connectors/chroma.py,sha256=q5_Fu4xb6_W_NyrPxVa3-jVwZLqVdlBNlR4dFvbd7l0,7235
69
69
  unstructured_ingest/processes/connectors/confluence.py,sha256=aA2B_FPdAjlVAJtmMldYu6lld2sR-6JL5tWh7yItiwg,22828
70
70
  unstructured_ingest/processes/connectors/couchbase.py,sha256=KCHoYDNya9B05NIB5D78zXoizFyfpJRepcYBe1nLSOs,12298
71
- unstructured_ingest/processes/connectors/delta_table.py,sha256=Y0P-4knPBc7Q8QwlvlDe6ksIKppNY4dBZhC1vwGARi0,12661
71
+ unstructured_ingest/processes/connectors/delta_table.py,sha256=xL2W0Ue9cW938d1k_8q4VzRWtX1srzu21WOwzhWfG1o,12710
72
72
  unstructured_ingest/processes/connectors/discord.py,sha256=CD-SBECMdr3pnmqbPvBMyPU2cBroXUhyW6F7L3laP6A,5348
73
73
  unstructured_ingest/processes/connectors/github.py,sha256=smHCz6jOH1p_hW2S25bYunBBj_pYjz8HTw6wkzaJz_A,7765
74
74
  unstructured_ingest/processes/connectors/gitlab.py,sha256=Fdq6_lk-By1JDmLGVjoKJkaHESiKTZsbvoHhMsljlE0,10114
@@ -103,7 +103,7 @@ unstructured_ingest/processes/connectors/duckdb/base.py,sha256=bTLhilg6mgERNCpee
103
103
  unstructured_ingest/processes/connectors/duckdb/duckdb.py,sha256=jsmibTd_yvYzkCT05HhCJvplyobtjfNILC3zyTuCcVY,4464
104
104
  unstructured_ingest/processes/connectors/duckdb/motherduck.py,sha256=Atr2MjJQGFGWh5aeiQsLpUbFw-aCZH-ABI1LprDh5VI,4727
105
105
  unstructured_ingest/processes/connectors/elasticsearch/__init__.py,sha256=M8mmBWoP6J5R3hxg6BQUMexYlTUxUxdBoIcjUop8yt8,826
106
- unstructured_ingest/processes/connectors/elasticsearch/elasticsearch.py,sha256=iCC4AP5s8YYa8sMldTFcHp9sfUK1LdQTD0oqXnvklwM,19305
106
+ unstructured_ingest/processes/connectors/elasticsearch/elasticsearch.py,sha256=2DZCluXR5IamwmrYmlaXTFI6g-q3y6uatuK2BDIlDj0,18773
107
107
  unstructured_ingest/processes/connectors/elasticsearch/opensearch.py,sha256=wggHvw8h-X0-3WPNxj9rt2xkrE7Pv7CV0B0KzTMzBB4,6944
108
108
  unstructured_ingest/processes/connectors/fsspec/__init__.py,sha256=3HTdw4L4mdN4W8UX0jQbMxBg0ZfITPbEXU7Bwdo1BfI,1843
109
109
  unstructured_ingest/processes/connectors/fsspec/azure.py,sha256=31VNiG5YnXfhrFX7QJ2O1ubeWHxbe1sYVIztefbscAQ,7148
@@ -235,8 +235,8 @@ unstructured_ingest/utils/pydantic_models.py,sha256=BT_j15e4rX40wQbt8LUXbqfPhA3r
235
235
  unstructured_ingest/utils/string_and_date_utils.py,sha256=oXOI6rxXq-8ncbk7EoJK0WCcTXWj75EzKl8pfQMID3U,2522
236
236
  unstructured_ingest/utils/table.py,sha256=WZechczgVFvlodUWFcsnCGvBNh1xRm6hr0VbJTPxKAc,3669
237
237
  unstructured_ingest/utils/tls.py,sha256=Ra8Mii1F4VqErRreg76PBI0eAqPBC009l0sSHa8FdnA,448
238
- unstructured_ingest-1.1.0.dist-info/METADATA,sha256=tJonV6SbQB5XL3BeyL8coDFhzzChMKGuSPQWQ3aoOdE,8875
239
- unstructured_ingest-1.1.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
240
- unstructured_ingest-1.1.0.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
241
- unstructured_ingest-1.1.0.dist-info/licenses/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
242
- unstructured_ingest-1.1.0.dist-info/RECORD,,
238
+ unstructured_ingest-1.1.3.dist-info/METADATA,sha256=Ztyvq_GpPKsmIMN5cCvXL0ppQdO4HIXtugh8rXuNqGY,8875
239
+ unstructured_ingest-1.1.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
240
+ unstructured_ingest-1.1.3.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
241
+ unstructured_ingest-1.1.3.dist-info/licenses/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
242
+ unstructured_ingest-1.1.3.dist-info/RECORD,,