unstructured-ingest 0.5.25__py3-none-any.whl → 0.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (106) hide show
  1. test/integration/connectors/databricks/test_volumes_native.py +1 -1
  2. test/integration/connectors/duckdb/test_duckdb.py +1 -1
  3. test/integration/connectors/duckdb/test_motherduck.py +1 -1
  4. test/integration/connectors/elasticsearch/test_elasticsearch.py +1 -1
  5. test/integration/connectors/elasticsearch/test_opensearch.py +1 -1
  6. test/integration/connectors/sql/test_databricks_delta_tables.py +1 -1
  7. test/integration/connectors/sql/test_postgres.py +1 -1
  8. test/integration/connectors/sql/test_singlestore.py +1 -1
  9. test/integration/connectors/sql/test_snowflake.py +1 -1
  10. test/integration/connectors/sql/test_sqlite.py +1 -1
  11. test/integration/connectors/test_astradb.py +1 -1
  12. test/integration/connectors/test_azure_ai_search.py +1 -1
  13. test/integration/connectors/test_chroma.py +1 -1
  14. test/integration/connectors/test_delta_table.py +1 -1
  15. test/integration/connectors/test_lancedb.py +1 -1
  16. test/integration/connectors/test_milvus.py +1 -1
  17. test/integration/connectors/test_mongodb.py +1 -1
  18. test/integration/connectors/test_neo4j.py +5 -5
  19. test/integration/connectors/test_onedrive.py +1 -1
  20. test/integration/connectors/test_pinecone.py +1 -1
  21. test/integration/connectors/test_qdrant.py +1 -1
  22. test/integration/connectors/test_redis.py +1 -1
  23. test/integration/connectors/test_s3.py +1 -1
  24. test/integration/connectors/test_vectara.py +1 -1
  25. test/integration/connectors/utils/validation/destination.py +2 -1
  26. test/integration/connectors/utils/validation/source.py +2 -1
  27. test/integration/connectors/weaviate/test_local.py +1 -1
  28. test/unit/test_html.py +1 -1
  29. test/unit/v2/connectors/ibm_watsonx/test_ibm_watsonx_s3.py +1 -1
  30. test/unit/v2/connectors/motherduck/test_base.py +1 -2
  31. test/unit/v2/connectors/sql/test_sql.py +1 -1
  32. unstructured_ingest/__version__.py +1 -1
  33. unstructured_ingest/utils/html.py +2 -1
  34. unstructured_ingest/v2/interfaces/__init__.py +0 -13
  35. unstructured_ingest/v2/interfaces/downloader.py +1 -1
  36. unstructured_ingest/v2/interfaces/indexer.py +1 -1
  37. unstructured_ingest/v2/interfaces/upload_stager.py +2 -2
  38. unstructured_ingest/v2/interfaces/uploader.py +2 -3
  39. unstructured_ingest/v2/pipeline/steps/chunk.py +1 -2
  40. unstructured_ingest/v2/pipeline/steps/download.py +2 -3
  41. unstructured_ingest/v2/pipeline/steps/embed.py +1 -2
  42. unstructured_ingest/v2/pipeline/steps/filter.py +1 -1
  43. unstructured_ingest/v2/pipeline/steps/partition.py +1 -2
  44. unstructured_ingest/v2/pipeline/steps/stage.py +2 -2
  45. unstructured_ingest/v2/pipeline/steps/uncompress.py +1 -1
  46. unstructured_ingest/v2/pipeline/steps/upload.py +2 -2
  47. unstructured_ingest/v2/processes/chunker.py +5 -2
  48. unstructured_ingest/v2/processes/connectors/airtable.py +1 -2
  49. unstructured_ingest/v2/processes/connectors/astradb.py +7 -5
  50. unstructured_ingest/v2/processes/connectors/azure_ai_search.py +1 -1
  51. unstructured_ingest/v2/processes/connectors/chroma.py +1 -1
  52. unstructured_ingest/v2/processes/connectors/confluence.py +5 -3
  53. unstructured_ingest/v2/processes/connectors/couchbase.py +7 -5
  54. unstructured_ingest/v2/processes/connectors/databricks/volumes.py +5 -3
  55. unstructured_ingest/v2/processes/connectors/databricks/volumes_table.py +2 -1
  56. unstructured_ingest/v2/processes/connectors/delta_table.py +1 -1
  57. unstructured_ingest/v2/processes/connectors/discord.py +5 -3
  58. unstructured_ingest/v2/processes/connectors/duckdb/base.py +2 -1
  59. unstructured_ingest/v2/processes/connectors/duckdb/duckdb.py +1 -1
  60. unstructured_ingest/v2/processes/connectors/duckdb/motherduck.py +1 -1
  61. unstructured_ingest/v2/processes/connectors/elasticsearch/elasticsearch.py +7 -5
  62. unstructured_ingest/v2/processes/connectors/fsspec/azure.py +1 -1
  63. unstructured_ingest/v2/processes/connectors/fsspec/box.py +1 -1
  64. unstructured_ingest/v2/processes/connectors/fsspec/dropbox.py +1 -1
  65. unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py +5 -3
  66. unstructured_ingest/v2/processes/connectors/fsspec/gcs.py +1 -1
  67. unstructured_ingest/v2/processes/connectors/fsspec/s3.py +3 -3
  68. unstructured_ingest/v2/processes/connectors/fsspec/sftp.py +1 -1
  69. unstructured_ingest/v2/processes/connectors/gitlab.py +5 -3
  70. unstructured_ingest/v2/processes/connectors/google_drive.py +5 -3
  71. unstructured_ingest/v2/processes/connectors/ibm_watsonx/ibm_watsonx_s3.py +1 -1
  72. unstructured_ingest/v2/processes/connectors/jira.py +5 -3
  73. unstructured_ingest/v2/processes/connectors/kafka/kafka.py +5 -3
  74. unstructured_ingest/v2/processes/connectors/kdbai.py +1 -1
  75. unstructured_ingest/v2/processes/connectors/lancedb/lancedb.py +8 -4
  76. unstructured_ingest/v2/processes/connectors/local.py +5 -3
  77. unstructured_ingest/v2/processes/connectors/milvus.py +1 -1
  78. unstructured_ingest/v2/processes/connectors/mongodb.py +7 -5
  79. unstructured_ingest/v2/processes/connectors/neo4j.py +1 -1
  80. unstructured_ingest/v2/processes/connectors/notion/connector.py +5 -3
  81. unstructured_ingest/v2/processes/connectors/onedrive.py +5 -3
  82. unstructured_ingest/v2/processes/connectors/outlook.py +5 -2
  83. unstructured_ingest/v2/processes/connectors/pinecone.py +1 -1
  84. unstructured_ingest/v2/processes/connectors/qdrant/qdrant.py +1 -1
  85. unstructured_ingest/v2/processes/connectors/redisdb.py +1 -1
  86. unstructured_ingest/v2/processes/connectors/salesforce.py +5 -3
  87. unstructured_ingest/v2/processes/connectors/sharepoint.py +3 -3
  88. unstructured_ingest/v2/processes/connectors/slack.py +2 -2
  89. unstructured_ingest/v2/processes/connectors/sql/databricks_delta_tables.py +1 -1
  90. unstructured_ingest/v2/processes/connectors/sql/snowflake.py +1 -1
  91. unstructured_ingest/v2/processes/connectors/sql/sql.py +7 -5
  92. unstructured_ingest/v2/processes/connectors/sql/vastdb.py +3 -3
  93. unstructured_ingest/v2/processes/connectors/vectara.py +1 -1
  94. unstructured_ingest/v2/processes/connectors/weaviate/weaviate.py +1 -1
  95. unstructured_ingest/v2/processes/connectors/zendesk/zendesk.py +5 -3
  96. unstructured_ingest/v2/processes/filter.py +1 -1
  97. unstructured_ingest/v2/processes/uncompress.py +1 -1
  98. unstructured_ingest/v2/processes/utils/blob_storage.py +2 -1
  99. unstructured_ingest/v2/utils.py +1 -1
  100. {unstructured_ingest-0.5.25.dist-info → unstructured_ingest-0.6.1.dist-info}/METADATA +16 -16
  101. {unstructured_ingest-0.5.25.dist-info → unstructured_ingest-0.6.1.dist-info}/RECORD +105 -106
  102. unstructured_ingest/v2/interfaces/file_data.py +0 -13
  103. {unstructured_ingest-0.5.25.dist-info → unstructured_ingest-0.6.1.dist-info}/LICENSE.md +0 -0
  104. {unstructured_ingest-0.5.25.dist-info → unstructured_ingest-0.6.1.dist-info}/WHEEL +0 -0
  105. {unstructured_ingest-0.5.25.dist-info → unstructured_ingest-0.6.1.dist-info}/entry_points.txt +0 -0
  106. {unstructured_ingest-0.5.25.dist-info → unstructured_ingest-0.6.1.dist-info}/top_level.txt +0 -0
@@ -13,7 +13,6 @@ from unstructured_ingest.v2.constants import RECORD_ID_LABEL
13
13
  from unstructured_ingest.v2.interfaces import (
14
14
  AccessConfig,
15
15
  ConnectionConfig,
16
- FileData,
17
16
  Uploader,
18
17
  UploaderConfig,
19
18
  UploadStager,
@@ -23,6 +22,7 @@ from unstructured_ingest.v2.logger import logger
23
22
  from unstructured_ingest.v2.processes.connector_registry import (
24
23
  DestinationRegistryEntry,
25
24
  )
25
+ from unstructured_ingest.v2.types.file_data import FileData
26
26
 
27
27
  if TYPE_CHECKING:
28
28
  from pymilvus import MilvusClient
@@ -13,17 +13,12 @@ from unstructured_ingest.utils.dep_check import requires_dependencies
13
13
  from unstructured_ingest.v2.constants import RECORD_ID_LABEL
14
14
  from unstructured_ingest.v2.interfaces import (
15
15
  AccessConfig,
16
- BatchFileData,
17
- BatchItem,
18
16
  ConnectionConfig,
19
17
  Downloader,
20
18
  DownloaderConfig,
21
19
  DownloadResponse,
22
- FileData,
23
- FileDataSourceMetadata,
24
20
  Indexer,
25
21
  IndexerConfig,
26
- SourceIdentifiers,
27
22
  Uploader,
28
23
  UploaderConfig,
29
24
  download_responses,
@@ -33,6 +28,13 @@ from unstructured_ingest.v2.processes.connector_registry import (
33
28
  DestinationRegistryEntry,
34
29
  SourceRegistryEntry,
35
30
  )
31
+ from unstructured_ingest.v2.types.file_data import (
32
+ BatchFileData,
33
+ BatchItem,
34
+ FileData,
35
+ FileDataSourceMetadata,
36
+ SourceIdentifiers,
37
+ )
36
38
 
37
39
  if TYPE_CHECKING:
38
40
  from pymongo import MongoClient
@@ -18,7 +18,6 @@ from unstructured_ingest.utils.dep_check import requires_dependencies
18
18
  from unstructured_ingest.v2.interfaces import (
19
19
  AccessConfig,
20
20
  ConnectionConfig,
21
- FileData,
22
21
  Uploader,
23
22
  UploaderConfig,
24
23
  UploadStager,
@@ -28,6 +27,7 @@ from unstructured_ingest.v2.processes.connector_registry import (
28
27
  DestinationRegistryEntry,
29
28
  )
30
29
  from unstructured_ingest.v2.processes.connectors.utils import format_and_truncate_orig_elements
30
+ from unstructured_ingest.v2.types.file_data import FileData
31
31
 
32
32
  SimilarityFunction = Literal["cosine"]
33
33
 
@@ -12,14 +12,16 @@ from unstructured_ingest.v2.interfaces import (
12
12
  Downloader,
13
13
  DownloaderConfig,
14
14
  DownloadResponse,
15
- FileData,
16
- FileDataSourceMetadata,
17
15
  Indexer,
18
16
  IndexerConfig,
19
- SourceIdentifiers,
20
17
  )
21
18
  from unstructured_ingest.v2.logger import logger
22
19
  from unstructured_ingest.v2.processes.connector_registry import SourceRegistryEntry
20
+ from unstructured_ingest.v2.types.file_data import (
21
+ FileData,
22
+ FileDataSourceMetadata,
23
+ SourceIdentifiers,
24
+ )
23
25
 
24
26
  if TYPE_CHECKING:
25
27
  from unstructured_ingest.v2.processes.connectors.notion.client import Client
@@ -22,11 +22,8 @@ from unstructured_ingest.v2.interfaces import (
22
22
  Downloader,
23
23
  DownloaderConfig,
24
24
  DownloadResponse,
25
- FileData,
26
- FileDataSourceMetadata,
27
25
  Indexer,
28
26
  IndexerConfig,
29
- SourceIdentifiers,
30
27
  Uploader,
31
28
  UploaderConfig,
32
29
  )
@@ -39,6 +36,11 @@ from unstructured_ingest.v2.processes.utils.blob_storage import (
39
36
  BlobStoreUploadStager,
40
37
  BlobStoreUploadStagerConfig,
41
38
  )
39
+ from unstructured_ingest.v2.types.file_data import (
40
+ FileData,
41
+ FileDataSourceMetadata,
42
+ SourceIdentifiers,
43
+ )
42
44
 
43
45
  if TYPE_CHECKING:
44
46
  from office365.graph_client import GraphClient
@@ -16,12 +16,15 @@ from unstructured_ingest.v2.interfaces import (
16
16
  Downloader,
17
17
  DownloaderConfig,
18
18
  DownloadResponse,
19
- FileData,
20
19
  Indexer,
21
20
  IndexerConfig,
22
21
  )
23
- from unstructured_ingest.v2.interfaces.file_data import FileDataSourceMetadata, SourceIdentifiers
24
22
  from unstructured_ingest.v2.processes.connector_registry import SourceRegistryEntry
23
+ from unstructured_ingest.v2.types.file_data import (
24
+ FileData,
25
+ FileDataSourceMetadata,
26
+ SourceIdentifiers,
27
+ )
25
28
 
26
29
  MAX_EMAILS_PER_FOLDER = 1_000_000 # Maximum number of emails per folder
27
30
 
@@ -13,7 +13,6 @@ from unstructured_ingest.v2.errors import UserError
13
13
  from unstructured_ingest.v2.interfaces import (
14
14
  AccessConfig,
15
15
  ConnectionConfig,
16
- FileData,
17
16
  UploaderConfig,
18
17
  UploadStager,
19
18
  UploadStagerConfig,
@@ -21,6 +20,7 @@ from unstructured_ingest.v2.interfaces import (
21
20
  )
22
21
  from unstructured_ingest.v2.logger import logger
23
22
  from unstructured_ingest.v2.processes.connector_registry import DestinationRegistryEntry
23
+ from unstructured_ingest.v2.types.file_data import FileData
24
24
  from unstructured_ingest.v2.utils import get_enhanced_element_id
25
25
 
26
26
  if TYPE_CHECKING:
@@ -13,13 +13,13 @@ from unstructured_ingest.utils.dep_check import requires_dependencies
13
13
  from unstructured_ingest.v2.interfaces import (
14
14
  AccessConfig,
15
15
  ConnectionConfig,
16
- FileData,
17
16
  Uploader,
18
17
  UploaderConfig,
19
18
  UploadStager,
20
19
  UploadStagerConfig,
21
20
  )
22
21
  from unstructured_ingest.v2.logger import logger
22
+ from unstructured_ingest.v2.types.file_data import FileData
23
23
  from unstructured_ingest.v2.utils import get_enhanced_element_id
24
24
 
25
25
  if TYPE_CHECKING:
@@ -11,12 +11,12 @@ from unstructured_ingest.utils.dep_check import requires_dependencies
11
11
  from unstructured_ingest.v2.interfaces import (
12
12
  AccessConfig,
13
13
  ConnectionConfig,
14
- FileData,
15
14
  Uploader,
16
15
  UploaderConfig,
17
16
  )
18
17
  from unstructured_ingest.v2.logger import logger
19
18
  from unstructured_ingest.v2.processes.connector_registry import DestinationRegistryEntry
19
+ from unstructured_ingest.v2.types.file_data import FileData
20
20
 
21
21
  if TYPE_CHECKING:
22
22
  from redis.asyncio import Redis
@@ -28,16 +28,18 @@ from unstructured_ingest.v2.interfaces import (
28
28
  Downloader,
29
29
  DownloaderConfig,
30
30
  DownloadResponse,
31
- FileData,
32
- FileDataSourceMetadata,
33
31
  Indexer,
34
32
  IndexerConfig,
35
- SourceIdentifiers,
36
33
  )
37
34
  from unstructured_ingest.v2.logger import logger
38
35
  from unstructured_ingest.v2.processes.connector_registry import (
39
36
  SourceRegistryEntry,
40
37
  )
38
+ from unstructured_ingest.v2.types.file_data import (
39
+ FileData,
40
+ FileDataSourceMetadata,
41
+ SourceIdentifiers,
42
+ )
41
43
 
42
44
 
43
45
  class MissingCategoryError(Exception):
@@ -11,9 +11,6 @@ from unstructured_ingest.error import (
11
11
  SourceConnectionNetworkError,
12
12
  )
13
13
  from unstructured_ingest.utils.dep_check import requires_dependencies
14
- from unstructured_ingest.v2.interfaces import (
15
- FileData,
16
- )
17
14
  from unstructured_ingest.v2.logger import logger
18
15
  from unstructured_ingest.v2.processes.connector_registry import (
19
16
  SourceRegistryEntry,
@@ -26,6 +23,9 @@ from unstructured_ingest.v2.processes.connectors.onedrive import (
26
23
  OnedriveIndexer,
27
24
  OnedriveIndexerConfig,
28
25
  )
26
+ from unstructured_ingest.v2.types.file_data import (
27
+ FileData,
28
+ )
29
29
 
30
30
  if TYPE_CHECKING:
31
31
  from office365.onedrive.driveitems.driveItem import DriveItem
@@ -20,12 +20,12 @@ from unstructured_ingest.v2.interfaces import (
20
20
  Indexer,
21
21
  IndexerConfig,
22
22
  )
23
- from unstructured_ingest.v2.interfaces.file_data import (
23
+ from unstructured_ingest.v2.processes.connector_registry import SourceRegistryEntry
24
+ from unstructured_ingest.v2.types.file_data import (
24
25
  FileData,
25
26
  FileDataSourceMetadata,
26
27
  SourceIdentifiers,
27
28
  )
28
- from unstructured_ingest.v2.processes.connector_registry import SourceRegistryEntry
29
29
 
30
30
  if TYPE_CHECKING:
31
31
  from slack_sdk import WebClient
@@ -7,7 +7,6 @@ from pydantic import Field, Secret
7
7
 
8
8
  from unstructured_ingest.utils.data_prep import split_dataframe
9
9
  from unstructured_ingest.utils.dep_check import requires_dependencies
10
- from unstructured_ingest.v2.interfaces import FileData
11
10
  from unstructured_ingest.v2.logger import logger
12
11
  from unstructured_ingest.v2.processes.connector_registry import (
13
12
  DestinationRegistryEntry,
@@ -20,6 +19,7 @@ from unstructured_ingest.v2.processes.connectors.sql.sql import (
20
19
  SQLUploadStager,
21
20
  SQLUploadStagerConfig,
22
21
  )
22
+ from unstructured_ingest.v2.types.file_data import FileData
23
23
 
24
24
  if TYPE_CHECKING:
25
25
  from databricks.sdk.core import oauth_service_principal
@@ -7,7 +7,6 @@ from pydantic import Field, Secret
7
7
 
8
8
  from unstructured_ingest.utils.data_prep import split_dataframe
9
9
  from unstructured_ingest.utils.dep_check import requires_dependencies
10
- from unstructured_ingest.v2.interfaces.file_data import FileData
11
10
  from unstructured_ingest.v2.logger import logger
12
11
  from unstructured_ingest.v2.processes.connector_registry import (
13
12
  DestinationRegistryEntry,
@@ -28,6 +27,7 @@ from unstructured_ingest.v2.processes.connectors.sql.sql import (
28
27
  SQLUploadStagerConfig,
29
28
  parse_date_string,
30
29
  )
30
+ from unstructured_ingest.v2.types.file_data import FileData
31
31
 
32
32
  if TYPE_CHECKING:
33
33
  from pandas import DataFrame
@@ -16,17 +16,12 @@ from unstructured_ingest.utils.data_prep import get_data, get_data_df, split_dat
16
16
  from unstructured_ingest.v2.constants import RECORD_ID_LABEL
17
17
  from unstructured_ingest.v2.interfaces import (
18
18
  AccessConfig,
19
- BatchFileData,
20
- BatchItem,
21
19
  ConnectionConfig,
22
20
  Downloader,
23
21
  DownloaderConfig,
24
22
  DownloadResponse,
25
- FileData,
26
- FileDataSourceMetadata,
27
23
  Indexer,
28
24
  IndexerConfig,
29
- SourceIdentifiers,
30
25
  Uploader,
31
26
  UploaderConfig,
32
27
  UploadStager,
@@ -34,6 +29,13 @@ from unstructured_ingest.v2.interfaces import (
34
29
  download_responses,
35
30
  )
36
31
  from unstructured_ingest.v2.logger import logger
32
+ from unstructured_ingest.v2.types.file_data import (
33
+ BatchFileData,
34
+ BatchItem,
35
+ FileData,
36
+ FileDataSourceMetadata,
37
+ SourceIdentifiers,
38
+ )
37
39
  from unstructured_ingest.v2.utils import get_enhanced_element_id
38
40
 
39
41
  if TYPE_CHECKING:
@@ -8,9 +8,6 @@ from unstructured_ingest.error import DestinationConnectionError
8
8
  from unstructured_ingest.utils.data_prep import split_dataframe
9
9
  from unstructured_ingest.utils.dep_check import requires_dependencies
10
10
  from unstructured_ingest.v2.constants import RECORD_ID_LABEL
11
- from unstructured_ingest.v2.interfaces import (
12
- FileData,
13
- )
14
11
  from unstructured_ingest.v2.logger import logger
15
12
  from unstructured_ingest.v2.processes.connector_registry import (
16
13
  DestinationRegistryEntry,
@@ -29,6 +26,9 @@ from unstructured_ingest.v2.processes.connectors.sql.sql import (
29
26
  SQLUploadStager,
30
27
  SQLUploadStagerConfig,
31
28
  )
29
+ from unstructured_ingest.v2.types.file_data import (
30
+ FileData,
31
+ )
32
32
  from unstructured_ingest.v2.utils import get_enhanced_element_id
33
33
 
34
34
  if TYPE_CHECKING:
@@ -14,7 +14,6 @@ from unstructured_ingest.utils.dep_check import requires_dependencies
14
14
  from unstructured_ingest.v2.interfaces import (
15
15
  AccessConfig,
16
16
  ConnectionConfig,
17
- FileData,
18
17
  Uploader,
19
18
  UploaderConfig,
20
19
  UploadStager,
@@ -22,6 +21,7 @@ from unstructured_ingest.v2.interfaces import (
22
21
  )
23
22
  from unstructured_ingest.v2.logger import logger
24
23
  from unstructured_ingest.v2.processes.connector_registry import DestinationRegistryEntry
24
+ from unstructured_ingest.v2.types.file_data import FileData
25
25
 
26
26
  BASE_URL = "https://api.vectara.io/v2"
27
27
 
@@ -16,13 +16,13 @@ from unstructured_ingest.v2.constants import RECORD_ID_LABEL
16
16
  from unstructured_ingest.v2.interfaces import (
17
17
  AccessConfig,
18
18
  ConnectionConfig,
19
- FileData,
20
19
  UploaderConfig,
21
20
  UploadStager,
22
21
  UploadStagerConfig,
23
22
  VectorDBUploader,
24
23
  )
25
24
  from unstructured_ingest.v2.logger import logger
25
+ from unstructured_ingest.v2.types.file_data import FileData
26
26
 
27
27
  if TYPE_CHECKING:
28
28
  from weaviate.classes.init import Timeout
@@ -16,14 +16,16 @@ from unstructured_ingest.v2.interfaces import (
16
16
  Downloader,
17
17
  DownloaderConfig,
18
18
  DownloadResponse,
19
- FileData,
20
- FileDataSourceMetadata,
21
19
  Indexer,
22
20
  IndexerConfig,
23
- SourceIdentifiers,
24
21
  )
25
22
  from unstructured_ingest.v2.logger import logger
26
23
  from unstructured_ingest.v2.processes.connector_registry import SourceRegistryEntry
24
+ from unstructured_ingest.v2.types.file_data import (
25
+ FileData,
26
+ FileDataSourceMetadata,
27
+ SourceIdentifiers,
28
+ )
27
29
 
28
30
  from .client import ZendeskArticle, ZendeskClient, ZendeskTicket
29
31
 
@@ -5,9 +5,9 @@ from typing import Any, Callable, Optional
5
5
 
6
6
  from pydantic import BaseModel, Field
7
7
 
8
- from unstructured_ingest.v2.interfaces import FileData
9
8
  from unstructured_ingest.v2.interfaces.process import BaseProcess
10
9
  from unstructured_ingest.v2.logger import logger
10
+ from unstructured_ingest.v2.types.file_data import FileData
11
11
 
12
12
 
13
13
  class FiltererConfig(BaseModel):
@@ -8,9 +8,9 @@ from uuid import NAMESPACE_DNS, uuid5
8
8
  from pydantic import BaseModel
9
9
 
10
10
  from unstructured_ingest.utils.compression import TAR_FILE_EXT, ZIP_FILE_EXT, uncompress_file
11
- from unstructured_ingest.v2.interfaces import FileData, SourceIdentifiers
12
11
  from unstructured_ingest.v2.interfaces.process import BaseProcess
13
12
  from unstructured_ingest.v2.logger import logger
13
+ from unstructured_ingest.v2.types.file_data import FileData, SourceIdentifiers
14
14
 
15
15
 
16
16
  class UncompressConfig(BaseModel):
@@ -3,7 +3,8 @@ from pathlib import Path
3
3
  from typing import Any
4
4
 
5
5
  from unstructured_ingest.utils.data_prep import get_data, write_data
6
- from unstructured_ingest.v2.interfaces import FileData, UploadStager, UploadStagerConfig
6
+ from unstructured_ingest.v2.interfaces import UploadStager, UploadStagerConfig
7
+ from unstructured_ingest.v2.types.file_data import FileData
7
8
 
8
9
 
9
10
  class BlobStoreUploadStagerConfig(UploadStagerConfig):
@@ -8,7 +8,7 @@ from uuid import NAMESPACE_DNS, uuid5
8
8
  from pydantic import BaseModel
9
9
  from pydantic.types import _SecretBase
10
10
 
11
- from unstructured_ingest.v2.interfaces import FileData
11
+ from unstructured_ingest.v2.types.file_data import FileData
12
12
 
13
13
 
14
14
  def is_secret(value: Any) -> bool:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: unstructured-ingest
3
- Version: 0.5.25
3
+ Version: 0.6.1
4
4
  Summary: A library that prepares raw documents for downstream ML tasks.
5
5
  Home-page: https://github.com/Unstructured-IO/unstructured-ingest
6
6
  Author: Unstructured Technologies
@@ -22,11 +22,11 @@ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
22
22
  Requires-Python: >=3.9.0,<3.14
23
23
  Description-Content-Type: text/markdown
24
24
  License-File: LICENSE.md
25
+ Requires-Dist: click
26
+ Requires-Dist: tqdm
25
27
  Requires-Dist: opentelemetry-sdk
26
28
  Requires-Dist: python-dateutil
27
- Requires-Dist: click
28
29
  Requires-Dist: dataclasses_json
29
- Requires-Dist: tqdm
30
30
  Requires-Dist: pydantic>=2.7
31
31
  Requires-Dist: numpy
32
32
  Requires-Dist: pandas
@@ -117,8 +117,8 @@ Requires-Dist: bs4; extra == "biomed"
117
117
  Requires-Dist: numpy; extra == "biomed"
118
118
  Requires-Dist: pandas; extra == "biomed"
119
119
  Provides-Extra: box
120
- Requires-Dist: boxfs; extra == "box"
121
120
  Requires-Dist: fsspec; extra == "box"
121
+ Requires-Dist: boxfs; extra == "box"
122
122
  Requires-Dist: numpy; extra == "box"
123
123
  Requires-Dist: pandas; extra == "box"
124
124
  Provides-Extra: chroma
@@ -130,8 +130,8 @@ Requires-Dist: clarifai; extra == "clarifai"
130
130
  Requires-Dist: numpy; extra == "clarifai"
131
131
  Requires-Dist: pandas; extra == "clarifai"
132
132
  Provides-Extra: confluence
133
- Requires-Dist: atlassian-python-api; extra == "confluence"
134
133
  Requires-Dist: requests; extra == "confluence"
134
+ Requires-Dist: atlassian-python-api; extra == "confluence"
135
135
  Requires-Dist: numpy; extra == "confluence"
136
136
  Requires-Dist: pandas; extra == "confluence"
137
137
  Provides-Extra: couchbase
@@ -185,10 +185,10 @@ Requires-Dist: urllib3; extra == "hubspot"
185
185
  Requires-Dist: numpy; extra == "hubspot"
186
186
  Requires-Dist: pandas; extra == "hubspot"
187
187
  Provides-Extra: ibm-watsonx-s3
188
- Requires-Dist: tenacity; extra == "ibm-watsonx-s3"
188
+ Requires-Dist: httpx; extra == "ibm-watsonx-s3"
189
189
  Requires-Dist: pyiceberg; extra == "ibm-watsonx-s3"
190
+ Requires-Dist: tenacity; extra == "ibm-watsonx-s3"
190
191
  Requires-Dist: pyarrow; extra == "ibm-watsonx-s3"
191
- Requires-Dist: httpx; extra == "ibm-watsonx-s3"
192
192
  Requires-Dist: numpy; extra == "ibm-watsonx-s3"
193
193
  Requires-Dist: pandas; extra == "ibm-watsonx-s3"
194
194
  Provides-Extra: jira
@@ -222,16 +222,16 @@ Requires-Dist: neo4j-rust-ext; extra == "neo4j"
222
222
  Requires-Dist: numpy; extra == "neo4j"
223
223
  Requires-Dist: pandas; extra == "neo4j"
224
224
  Provides-Extra: notion
225
+ Requires-Dist: httpx; extra == "notion"
225
226
  Requires-Dist: notion-client; extra == "notion"
226
- Requires-Dist: backoff; extra == "notion"
227
227
  Requires-Dist: htmlBuilder; extra == "notion"
228
- Requires-Dist: httpx; extra == "notion"
228
+ Requires-Dist: backoff; extra == "notion"
229
229
  Requires-Dist: numpy; extra == "notion"
230
230
  Requires-Dist: pandas; extra == "notion"
231
231
  Provides-Extra: onedrive
232
- Requires-Dist: msal; extra == "onedrive"
233
232
  Requires-Dist: Office365-REST-Python-Client; extra == "onedrive"
234
233
  Requires-Dist: bs4; extra == "onedrive"
234
+ Requires-Dist: msal; extra == "onedrive"
235
235
  Requires-Dist: numpy; extra == "onedrive"
236
236
  Requires-Dist: pandas; extra == "onedrive"
237
237
  Provides-Extra: opensearch
@@ -239,8 +239,8 @@ Requires-Dist: opensearch-py; extra == "opensearch"
239
239
  Requires-Dist: numpy; extra == "opensearch"
240
240
  Requires-Dist: pandas; extra == "opensearch"
241
241
  Provides-Extra: outlook
242
- Requires-Dist: msal; extra == "outlook"
243
242
  Requires-Dist: Office365-REST-Python-Client; extra == "outlook"
243
+ Requires-Dist: msal; extra == "outlook"
244
244
  Requires-Dist: numpy; extra == "outlook"
245
245
  Requires-Dist: pandas; extra == "outlook"
246
246
  Provides-Extra: pinecone
@@ -264,13 +264,13 @@ Requires-Dist: redis; extra == "redis"
264
264
  Requires-Dist: numpy; extra == "redis"
265
265
  Requires-Dist: pandas; extra == "redis"
266
266
  Provides-Extra: s3
267
- Requires-Dist: fsspec; extra == "s3"
268
267
  Requires-Dist: s3fs; extra == "s3"
268
+ Requires-Dist: fsspec; extra == "s3"
269
269
  Requires-Dist: numpy; extra == "s3"
270
270
  Requires-Dist: pandas; extra == "s3"
271
271
  Provides-Extra: sharepoint
272
- Requires-Dist: msal; extra == "sharepoint"
273
272
  Requires-Dist: Office365-REST-Python-Client; extra == "sharepoint"
273
+ Requires-Dist: msal; extra == "sharepoint"
274
274
  Requires-Dist: numpy; extra == "sharepoint"
275
275
  Requires-Dist: pandas; extra == "sharepoint"
276
276
  Provides-Extra: salesforce
@@ -318,14 +318,14 @@ Requires-Dist: httpx; extra == "vectara"
318
318
  Requires-Dist: numpy; extra == "vectara"
319
319
  Requires-Dist: pandas; extra == "vectara"
320
320
  Provides-Extra: vastdb
321
- Requires-Dist: pyarrow; extra == "vastdb"
322
- Requires-Dist: ibis; extra == "vastdb"
323
321
  Requires-Dist: vastdb; extra == "vastdb"
322
+ Requires-Dist: ibis; extra == "vastdb"
323
+ Requires-Dist: pyarrow; extra == "vastdb"
324
324
  Requires-Dist: numpy; extra == "vastdb"
325
325
  Requires-Dist: pandas; extra == "vastdb"
326
326
  Provides-Extra: zendesk
327
- Requires-Dist: aiofiles; extra == "zendesk"
328
327
  Requires-Dist: httpx; extra == "zendesk"
328
+ Requires-Dist: aiofiles; extra == "zendesk"
329
329
  Requires-Dist: bs4; extra == "zendesk"
330
330
  Requires-Dist: numpy; extra == "zendesk"
331
331
  Requires-Dist: pandas; extra == "zendesk"