unstructured-ingest 0.5.25__py3-none-any.whl → 0.6.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- test/integration/connectors/databricks/test_volumes_native.py +1 -1
- test/integration/connectors/duckdb/test_duckdb.py +1 -1
- test/integration/connectors/duckdb/test_motherduck.py +1 -1
- test/integration/connectors/elasticsearch/test_elasticsearch.py +1 -1
- test/integration/connectors/elasticsearch/test_opensearch.py +1 -1
- test/integration/connectors/sql/test_databricks_delta_tables.py +1 -1
- test/integration/connectors/sql/test_postgres.py +1 -1
- test/integration/connectors/sql/test_singlestore.py +1 -1
- test/integration/connectors/sql/test_snowflake.py +1 -1
- test/integration/connectors/sql/test_sqlite.py +1 -1
- test/integration/connectors/test_astradb.py +1 -1
- test/integration/connectors/test_azure_ai_search.py +1 -1
- test/integration/connectors/test_chroma.py +1 -1
- test/integration/connectors/test_delta_table.py +1 -1
- test/integration/connectors/test_lancedb.py +1 -1
- test/integration/connectors/test_milvus.py +1 -1
- test/integration/connectors/test_mongodb.py +1 -1
- test/integration/connectors/test_neo4j.py +5 -5
- test/integration/connectors/test_onedrive.py +1 -1
- test/integration/connectors/test_pinecone.py +1 -1
- test/integration/connectors/test_qdrant.py +1 -1
- test/integration/connectors/test_redis.py +1 -1
- test/integration/connectors/test_s3.py +1 -1
- test/integration/connectors/test_vectara.py +1 -1
- test/integration/connectors/utils/validation/destination.py +2 -1
- test/integration/connectors/utils/validation/source.py +2 -1
- test/integration/connectors/weaviate/test_local.py +1 -1
- test/unit/test_html.py +1 -1
- test/unit/v2/connectors/ibm_watsonx/test_ibm_watsonx_s3.py +1 -1
- test/unit/v2/connectors/motherduck/test_base.py +1 -2
- test/unit/v2/connectors/sql/test_sql.py +1 -1
- unstructured_ingest/__version__.py +1 -1
- unstructured_ingest/utils/html.py +2 -1
- unstructured_ingest/v2/interfaces/__init__.py +0 -13
- unstructured_ingest/v2/interfaces/downloader.py +1 -1
- unstructured_ingest/v2/interfaces/indexer.py +1 -1
- unstructured_ingest/v2/interfaces/upload_stager.py +2 -2
- unstructured_ingest/v2/interfaces/uploader.py +2 -3
- unstructured_ingest/v2/pipeline/steps/chunk.py +1 -2
- unstructured_ingest/v2/pipeline/steps/download.py +2 -3
- unstructured_ingest/v2/pipeline/steps/embed.py +1 -2
- unstructured_ingest/v2/pipeline/steps/filter.py +1 -1
- unstructured_ingest/v2/pipeline/steps/partition.py +1 -2
- unstructured_ingest/v2/pipeline/steps/stage.py +2 -2
- unstructured_ingest/v2/pipeline/steps/uncompress.py +1 -1
- unstructured_ingest/v2/pipeline/steps/upload.py +2 -2
- unstructured_ingest/v2/processes/chunker.py +5 -2
- unstructured_ingest/v2/processes/connectors/airtable.py +1 -2
- unstructured_ingest/v2/processes/connectors/astradb.py +7 -5
- unstructured_ingest/v2/processes/connectors/azure_ai_search.py +1 -1
- unstructured_ingest/v2/processes/connectors/chroma.py +1 -1
- unstructured_ingest/v2/processes/connectors/confluence.py +5 -3
- unstructured_ingest/v2/processes/connectors/couchbase.py +7 -5
- unstructured_ingest/v2/processes/connectors/databricks/volumes.py +5 -3
- unstructured_ingest/v2/processes/connectors/databricks/volumes_table.py +2 -1
- unstructured_ingest/v2/processes/connectors/delta_table.py +1 -1
- unstructured_ingest/v2/processes/connectors/discord.py +5 -3
- unstructured_ingest/v2/processes/connectors/duckdb/base.py +2 -1
- unstructured_ingest/v2/processes/connectors/duckdb/duckdb.py +1 -1
- unstructured_ingest/v2/processes/connectors/duckdb/motherduck.py +1 -1
- unstructured_ingest/v2/processes/connectors/elasticsearch/elasticsearch.py +7 -5
- unstructured_ingest/v2/processes/connectors/fsspec/azure.py +1 -1
- unstructured_ingest/v2/processes/connectors/fsspec/box.py +1 -1
- unstructured_ingest/v2/processes/connectors/fsspec/dropbox.py +1 -1
- unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py +5 -3
- unstructured_ingest/v2/processes/connectors/fsspec/gcs.py +1 -1
- unstructured_ingest/v2/processes/connectors/fsspec/s3.py +3 -3
- unstructured_ingest/v2/processes/connectors/fsspec/sftp.py +1 -1
- unstructured_ingest/v2/processes/connectors/gitlab.py +5 -3
- unstructured_ingest/v2/processes/connectors/google_drive.py +5 -3
- unstructured_ingest/v2/processes/connectors/ibm_watsonx/ibm_watsonx_s3.py +1 -1
- unstructured_ingest/v2/processes/connectors/jira.py +5 -3
- unstructured_ingest/v2/processes/connectors/kafka/kafka.py +5 -3
- unstructured_ingest/v2/processes/connectors/kdbai.py +1 -1
- unstructured_ingest/v2/processes/connectors/lancedb/lancedb.py +8 -4
- unstructured_ingest/v2/processes/connectors/local.py +5 -3
- unstructured_ingest/v2/processes/connectors/milvus.py +1 -1
- unstructured_ingest/v2/processes/connectors/mongodb.py +7 -5
- unstructured_ingest/v2/processes/connectors/neo4j.py +1 -1
- unstructured_ingest/v2/processes/connectors/notion/connector.py +5 -3
- unstructured_ingest/v2/processes/connectors/onedrive.py +5 -3
- unstructured_ingest/v2/processes/connectors/outlook.py +5 -2
- unstructured_ingest/v2/processes/connectors/pinecone.py +1 -1
- unstructured_ingest/v2/processes/connectors/qdrant/qdrant.py +1 -1
- unstructured_ingest/v2/processes/connectors/redisdb.py +1 -1
- unstructured_ingest/v2/processes/connectors/salesforce.py +5 -3
- unstructured_ingest/v2/processes/connectors/sharepoint.py +3 -3
- unstructured_ingest/v2/processes/connectors/slack.py +2 -2
- unstructured_ingest/v2/processes/connectors/sql/databricks_delta_tables.py +1 -1
- unstructured_ingest/v2/processes/connectors/sql/snowflake.py +1 -1
- unstructured_ingest/v2/processes/connectors/sql/sql.py +7 -5
- unstructured_ingest/v2/processes/connectors/sql/vastdb.py +3 -3
- unstructured_ingest/v2/processes/connectors/vectara.py +1 -1
- unstructured_ingest/v2/processes/connectors/weaviate/weaviate.py +1 -1
- unstructured_ingest/v2/processes/connectors/zendesk/zendesk.py +5 -3
- unstructured_ingest/v2/processes/filter.py +1 -1
- unstructured_ingest/v2/processes/uncompress.py +1 -1
- unstructured_ingest/v2/processes/utils/blob_storage.py +2 -1
- unstructured_ingest/v2/utils.py +1 -1
- {unstructured_ingest-0.5.25.dist-info → unstructured_ingest-0.6.1.dist-info}/METADATA +16 -16
- {unstructured_ingest-0.5.25.dist-info → unstructured_ingest-0.6.1.dist-info}/RECORD +105 -106
- unstructured_ingest/v2/interfaces/file_data.py +0 -13
- {unstructured_ingest-0.5.25.dist-info → unstructured_ingest-0.6.1.dist-info}/LICENSE.md +0 -0
- {unstructured_ingest-0.5.25.dist-info → unstructured_ingest-0.6.1.dist-info}/WHEEL +0 -0
- {unstructured_ingest-0.5.25.dist-info → unstructured_ingest-0.6.1.dist-info}/entry_points.txt +0 -0
- {unstructured_ingest-0.5.25.dist-info → unstructured_ingest-0.6.1.dist-info}/top_level.txt +0 -0
|
@@ -5,11 +5,10 @@ from pathlib import Path
|
|
|
5
5
|
from typing import Callable, Optional, TypedDict
|
|
6
6
|
|
|
7
7
|
from unstructured_ingest.utils.data_prep import write_data
|
|
8
|
-
from unstructured_ingest.v2.interfaces import FileData
|
|
9
|
-
from unstructured_ingest.v2.interfaces.file_data import file_data_from_file
|
|
10
8
|
from unstructured_ingest.v2.logger import logger
|
|
11
9
|
from unstructured_ingest.v2.pipeline.interfaces import PipelineStep
|
|
12
10
|
from unstructured_ingest.v2.processes.partitioner import Partitioner
|
|
11
|
+
from unstructured_ingest.v2.types.file_data import FileData, file_data_from_file
|
|
13
12
|
from unstructured_ingest.v2.utils import serialize_base_model_json
|
|
14
13
|
|
|
15
14
|
STEP_ID = "partition"
|
|
@@ -4,10 +4,10 @@ from dataclasses import dataclass
|
|
|
4
4
|
from pathlib import Path
|
|
5
5
|
from typing import Callable, Optional, TypedDict
|
|
6
6
|
|
|
7
|
-
from unstructured_ingest.v2.interfaces
|
|
8
|
-
from unstructured_ingest.v2.interfaces.upload_stager import UploadStager
|
|
7
|
+
from unstructured_ingest.v2.interfaces import UploadStager
|
|
9
8
|
from unstructured_ingest.v2.logger import logger
|
|
10
9
|
from unstructured_ingest.v2.pipeline.interfaces import PipelineStep
|
|
10
|
+
from unstructured_ingest.v2.types.file_data import file_data_from_file
|
|
11
11
|
from unstructured_ingest.v2.utils import serialize_base_model_json
|
|
12
12
|
|
|
13
13
|
STEP_ID = "upload_stage"
|
|
@@ -3,10 +3,10 @@ from dataclasses import dataclass
|
|
|
3
3
|
from pathlib import Path
|
|
4
4
|
from typing import Callable, TypedDict
|
|
5
5
|
|
|
6
|
-
from unstructured_ingest.v2.interfaces.file_data import file_data_from_file
|
|
7
6
|
from unstructured_ingest.v2.logger import logger
|
|
8
7
|
from unstructured_ingest.v2.pipeline.interfaces import PipelineStep
|
|
9
8
|
from unstructured_ingest.v2.processes.uncompress import Uncompressor
|
|
9
|
+
from unstructured_ingest.v2.types.file_data import file_data_from_file
|
|
10
10
|
|
|
11
11
|
STEP_ID = "uncompress"
|
|
12
12
|
|
|
@@ -3,11 +3,11 @@ from dataclasses import dataclass
|
|
|
3
3
|
from pathlib import Path
|
|
4
4
|
from typing import Callable, Optional, TypedDict
|
|
5
5
|
|
|
6
|
-
from unstructured_ingest.v2.interfaces
|
|
7
|
-
from unstructured_ingest.v2.interfaces.uploader import UploadContent
|
|
6
|
+
from unstructured_ingest.v2.interfaces import UploadContent
|
|
8
7
|
from unstructured_ingest.v2.logger import logger
|
|
9
8
|
from unstructured_ingest.v2.pipeline.interfaces import BatchPipelineStep
|
|
10
9
|
from unstructured_ingest.v2.pipeline.otel import instrument
|
|
10
|
+
from unstructured_ingest.v2.types.file_data import file_data_from_file
|
|
11
11
|
|
|
12
12
|
STEP_ID = "upload"
|
|
13
13
|
|
|
@@ -6,6 +6,7 @@ from typing import Any, Optional
|
|
|
6
6
|
from pydantic import BaseModel, Field, SecretStr
|
|
7
7
|
|
|
8
8
|
from unstructured_ingest.utils.chunking import assign_and_map_hash_ids
|
|
9
|
+
from unstructured_ingest.utils.data_prep import get_json_data
|
|
9
10
|
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
10
11
|
from unstructured_ingest.v2.interfaces.process import BaseProcess
|
|
11
12
|
from unstructured_ingest.v2.logger import logger
|
|
@@ -92,9 +93,11 @@ class Chunker(BaseProcess, ABC):
|
|
|
92
93
|
@requires_dependencies(dependencies=["unstructured"])
|
|
93
94
|
def run(self, elements_filepath: Path, **kwargs: Any) -> list[dict]:
|
|
94
95
|
from unstructured.chunking import dispatch
|
|
95
|
-
from unstructured.staging.base import
|
|
96
|
+
from unstructured.staging.base import elements_from_dicts
|
|
96
97
|
|
|
97
|
-
|
|
98
|
+
element_dicts = get_json_data(elements_filepath)
|
|
99
|
+
|
|
100
|
+
elements = elements_from_dicts(element_dicts=element_dicts)
|
|
98
101
|
if not elements:
|
|
99
102
|
return [e.to_dict() for e in elements]
|
|
100
103
|
local_chunking_strategies = ("basic", "by_title")
|
|
@@ -13,14 +13,13 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
13
13
|
Downloader,
|
|
14
14
|
DownloaderConfig,
|
|
15
15
|
DownloadResponse,
|
|
16
|
-
FileData,
|
|
17
16
|
Indexer,
|
|
18
17
|
IndexerConfig,
|
|
19
|
-
SourceIdentifiers,
|
|
20
18
|
)
|
|
21
19
|
from unstructured_ingest.v2.processes.connector_registry import (
|
|
22
20
|
SourceRegistryEntry,
|
|
23
21
|
)
|
|
22
|
+
from unstructured_ingest.v2.types.file_data import FileData, SourceIdentifiers
|
|
24
23
|
|
|
25
24
|
if TYPE_CHECKING:
|
|
26
25
|
from pyairtable import Api
|
|
@@ -21,17 +21,12 @@ from unstructured_ingest.utils.string_and_date_utils import truncate_string_byte
|
|
|
21
21
|
from unstructured_ingest.v2.constants import RECORD_ID_LABEL
|
|
22
22
|
from unstructured_ingest.v2.interfaces import (
|
|
23
23
|
AccessConfig,
|
|
24
|
-
BatchFileData,
|
|
25
|
-
BatchItem,
|
|
26
24
|
ConnectionConfig,
|
|
27
25
|
Downloader,
|
|
28
26
|
DownloaderConfig,
|
|
29
27
|
DownloadResponse,
|
|
30
|
-
FileData,
|
|
31
|
-
FileDataSourceMetadata,
|
|
32
28
|
Indexer,
|
|
33
29
|
IndexerConfig,
|
|
34
|
-
SourceIdentifiers,
|
|
35
30
|
Uploader,
|
|
36
31
|
UploaderConfig,
|
|
37
32
|
UploadStager,
|
|
@@ -44,6 +39,13 @@ from unstructured_ingest.v2.processes.connector_registry import (
|
|
|
44
39
|
SourceRegistryEntry,
|
|
45
40
|
)
|
|
46
41
|
from unstructured_ingest.v2.processes.connectors.utils import format_and_truncate_orig_elements
|
|
42
|
+
from unstructured_ingest.v2.types.file_data import (
|
|
43
|
+
BatchFileData,
|
|
44
|
+
BatchItem,
|
|
45
|
+
FileData,
|
|
46
|
+
FileDataSourceMetadata,
|
|
47
|
+
SourceIdentifiers,
|
|
48
|
+
)
|
|
47
49
|
|
|
48
50
|
if TYPE_CHECKING:
|
|
49
51
|
from astrapy import AsyncCollection as AstraDBAsyncCollection
|
|
@@ -12,7 +12,6 @@ from unstructured_ingest.v2.constants import RECORD_ID_LABEL
|
|
|
12
12
|
from unstructured_ingest.v2.interfaces import (
|
|
13
13
|
AccessConfig,
|
|
14
14
|
ConnectionConfig,
|
|
15
|
-
FileData,
|
|
16
15
|
Uploader,
|
|
17
16
|
UploaderConfig,
|
|
18
17
|
UploadStager,
|
|
@@ -23,6 +22,7 @@ from unstructured_ingest.v2.processes.connector_registry import (
|
|
|
23
22
|
DestinationRegistryEntry,
|
|
24
23
|
)
|
|
25
24
|
from unstructured_ingest.v2.processes.connectors.utils import parse_datetime
|
|
25
|
+
from unstructured_ingest.v2.types.file_data import FileData
|
|
26
26
|
from unstructured_ingest.v2.utils import get_enhanced_element_id
|
|
27
27
|
|
|
28
28
|
if TYPE_CHECKING:
|
|
@@ -12,7 +12,6 @@ from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
|
12
12
|
from unstructured_ingest.v2.interfaces import (
|
|
13
13
|
AccessConfig,
|
|
14
14
|
ConnectionConfig,
|
|
15
|
-
FileData,
|
|
16
15
|
Uploader,
|
|
17
16
|
UploaderConfig,
|
|
18
17
|
UploadStager,
|
|
@@ -20,6 +19,7 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
20
19
|
)
|
|
21
20
|
from unstructured_ingest.v2.logger import logger
|
|
22
21
|
from unstructured_ingest.v2.processes.connector_registry import DestinationRegistryEntry
|
|
22
|
+
from unstructured_ingest.v2.types.file_data import FileData
|
|
23
23
|
from unstructured_ingest.v2.utils import get_enhanced_element_id
|
|
24
24
|
|
|
25
25
|
from .utils import conform_string_to_dict
|
|
@@ -15,17 +15,19 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
15
15
|
Downloader,
|
|
16
16
|
DownloaderConfig,
|
|
17
17
|
DownloadResponse,
|
|
18
|
-
FileData,
|
|
19
|
-
FileDataSourceMetadata,
|
|
20
18
|
Indexer,
|
|
21
19
|
IndexerConfig,
|
|
22
|
-
SourceIdentifiers,
|
|
23
20
|
download_responses,
|
|
24
21
|
)
|
|
25
22
|
from unstructured_ingest.v2.logger import logger
|
|
26
23
|
from unstructured_ingest.v2.processes.connector_registry import (
|
|
27
24
|
SourceRegistryEntry,
|
|
28
25
|
)
|
|
26
|
+
from unstructured_ingest.v2.types.file_data import (
|
|
27
|
+
FileData,
|
|
28
|
+
FileDataSourceMetadata,
|
|
29
|
+
SourceIdentifiers,
|
|
30
|
+
)
|
|
29
31
|
|
|
30
32
|
if TYPE_CHECKING:
|
|
31
33
|
from atlassian import Confluence
|
|
@@ -17,17 +17,12 @@ from unstructured_ingest.utils.data_prep import batch_generator, flatten_dict
|
|
|
17
17
|
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
18
18
|
from unstructured_ingest.v2.interfaces import (
|
|
19
19
|
AccessConfig,
|
|
20
|
-
BatchFileData,
|
|
21
|
-
BatchItem,
|
|
22
20
|
ConnectionConfig,
|
|
23
21
|
Downloader,
|
|
24
22
|
DownloaderConfig,
|
|
25
23
|
DownloadResponse,
|
|
26
|
-
FileData,
|
|
27
|
-
FileDataSourceMetadata,
|
|
28
24
|
Indexer,
|
|
29
25
|
IndexerConfig,
|
|
30
|
-
SourceIdentifiers,
|
|
31
26
|
Uploader,
|
|
32
27
|
UploaderConfig,
|
|
33
28
|
UploadStager,
|
|
@@ -39,6 +34,13 @@ from unstructured_ingest.v2.processes.connector_registry import (
|
|
|
39
34
|
DestinationRegistryEntry,
|
|
40
35
|
SourceRegistryEntry,
|
|
41
36
|
)
|
|
37
|
+
from unstructured_ingest.v2.types.file_data import (
|
|
38
|
+
BatchFileData,
|
|
39
|
+
BatchItem,
|
|
40
|
+
FileData,
|
|
41
|
+
FileDataSourceMetadata,
|
|
42
|
+
SourceIdentifiers,
|
|
43
|
+
)
|
|
42
44
|
|
|
43
45
|
if TYPE_CHECKING:
|
|
44
46
|
from couchbase.cluster import Cluster
|
|
@@ -20,15 +20,17 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
20
20
|
Downloader,
|
|
21
21
|
DownloaderConfig,
|
|
22
22
|
DownloadResponse,
|
|
23
|
-
FileData,
|
|
24
|
-
FileDataSourceMetadata,
|
|
25
23
|
Indexer,
|
|
26
24
|
IndexerConfig,
|
|
27
|
-
SourceIdentifiers,
|
|
28
25
|
Uploader,
|
|
29
26
|
UploaderConfig,
|
|
30
27
|
)
|
|
31
28
|
from unstructured_ingest.v2.logger import logger
|
|
29
|
+
from unstructured_ingest.v2.types.file_data import (
|
|
30
|
+
FileData,
|
|
31
|
+
FileDataSourceMetadata,
|
|
32
|
+
SourceIdentifiers,
|
|
33
|
+
)
|
|
32
34
|
|
|
33
35
|
if TYPE_CHECKING:
|
|
34
36
|
from databricks.sdk import WorkspaceClient
|
|
@@ -8,7 +8,7 @@ from typing import TYPE_CHECKING, Any, Generator, Optional
|
|
|
8
8
|
from pydantic import Field
|
|
9
9
|
|
|
10
10
|
from unstructured_ingest.utils.data_prep import get_data_df, write_data
|
|
11
|
-
from unstructured_ingest.v2.interfaces import
|
|
11
|
+
from unstructured_ingest.v2.interfaces import Uploader, UploaderConfig
|
|
12
12
|
from unstructured_ingest.v2.logger import logger
|
|
13
13
|
from unstructured_ingest.v2.processes.connector_registry import (
|
|
14
14
|
DestinationRegistryEntry,
|
|
@@ -19,6 +19,7 @@ from unstructured_ingest.v2.processes.connectors.sql.databricks_delta_tables imp
|
|
|
19
19
|
DatabricksDeltaTablesUploadStager,
|
|
20
20
|
DatabricksDeltaTablesUploadStagerConfig,
|
|
21
21
|
)
|
|
22
|
+
from unstructured_ingest.v2.types.file_data import FileData
|
|
22
23
|
|
|
23
24
|
CONNECTOR_TYPE = "databricks_volume_delta_tables"
|
|
24
25
|
|
|
@@ -15,7 +15,6 @@ from unstructured_ingest.utils.table import convert_to_pandas_dataframe
|
|
|
15
15
|
from unstructured_ingest.v2.interfaces import (
|
|
16
16
|
AccessConfig,
|
|
17
17
|
ConnectionConfig,
|
|
18
|
-
FileData,
|
|
19
18
|
Uploader,
|
|
20
19
|
UploaderConfig,
|
|
21
20
|
UploadStager,
|
|
@@ -23,6 +22,7 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
23
22
|
)
|
|
24
23
|
from unstructured_ingest.v2.logger import logger
|
|
25
24
|
from unstructured_ingest.v2.processes.connector_registry import DestinationRegistryEntry
|
|
25
|
+
from unstructured_ingest.v2.types.file_data import FileData
|
|
26
26
|
|
|
27
27
|
CONNECTOR_TYPE = "delta_table"
|
|
28
28
|
|
|
@@ -12,14 +12,16 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
12
12
|
Downloader,
|
|
13
13
|
DownloaderConfig,
|
|
14
14
|
DownloadResponse,
|
|
15
|
-
FileData,
|
|
16
|
-
FileDataSourceMetadata,
|
|
17
15
|
Indexer,
|
|
18
16
|
IndexerConfig,
|
|
19
|
-
SourceIdentifiers,
|
|
20
17
|
)
|
|
21
18
|
from unstructured_ingest.v2.logger import logger
|
|
22
19
|
from unstructured_ingest.v2.processes.connector_registry import SourceRegistryEntry
|
|
20
|
+
from unstructured_ingest.v2.types.file_data import (
|
|
21
|
+
FileData,
|
|
22
|
+
FileDataSourceMetadata,
|
|
23
|
+
SourceIdentifiers,
|
|
24
|
+
)
|
|
23
25
|
|
|
24
26
|
if TYPE_CHECKING:
|
|
25
27
|
from discord import Client as DiscordClient
|
|
@@ -4,7 +4,8 @@ from typing import Any
|
|
|
4
4
|
|
|
5
5
|
from unstructured_ingest.utils.data_prep import get_data, write_data
|
|
6
6
|
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
7
|
-
from unstructured_ingest.v2.interfaces import
|
|
7
|
+
from unstructured_ingest.v2.interfaces import UploadStager
|
|
8
|
+
from unstructured_ingest.v2.types.file_data import FileData
|
|
8
9
|
from unstructured_ingest.v2.utils import get_enhanced_element_id
|
|
9
10
|
|
|
10
11
|
_COLUMNS = (
|
|
@@ -11,7 +11,6 @@ from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
|
11
11
|
from unstructured_ingest.v2.interfaces import (
|
|
12
12
|
AccessConfig,
|
|
13
13
|
ConnectionConfig,
|
|
14
|
-
FileData,
|
|
15
14
|
Uploader,
|
|
16
15
|
UploaderConfig,
|
|
17
16
|
UploadStagerConfig,
|
|
@@ -19,6 +18,7 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
19
18
|
from unstructured_ingest.v2.logger import logger
|
|
20
19
|
from unstructured_ingest.v2.processes.connector_registry import DestinationRegistryEntry
|
|
21
20
|
from unstructured_ingest.v2.processes.connectors.duckdb.base import BaseDuckDBUploadStager
|
|
21
|
+
from unstructured_ingest.v2.types.file_data import FileData
|
|
22
22
|
|
|
23
23
|
if TYPE_CHECKING:
|
|
24
24
|
from duckdb import DuckDBPyConnection as DuckDBConnection
|
|
@@ -12,7 +12,6 @@ from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
|
12
12
|
from unstructured_ingest.v2.interfaces import (
|
|
13
13
|
AccessConfig,
|
|
14
14
|
ConnectionConfig,
|
|
15
|
-
FileData,
|
|
16
15
|
Uploader,
|
|
17
16
|
UploaderConfig,
|
|
18
17
|
UploadStagerConfig,
|
|
@@ -20,6 +19,7 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
20
19
|
from unstructured_ingest.v2.logger import logger
|
|
21
20
|
from unstructured_ingest.v2.processes.connector_registry import DestinationRegistryEntry
|
|
22
21
|
from unstructured_ingest.v2.processes.connectors.duckdb.base import BaseDuckDBUploadStager
|
|
22
|
+
from unstructured_ingest.v2.types.file_data import FileData
|
|
23
23
|
|
|
24
24
|
if TYPE_CHECKING:
|
|
25
25
|
from duckdb import DuckDBPyConnection as MotherDuckConnection
|
|
@@ -23,17 +23,12 @@ from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
|
23
23
|
from unstructured_ingest.v2.constants import RECORD_ID_LABEL
|
|
24
24
|
from unstructured_ingest.v2.interfaces import (
|
|
25
25
|
AccessConfig,
|
|
26
|
-
BatchFileData,
|
|
27
|
-
BatchItem,
|
|
28
26
|
ConnectionConfig,
|
|
29
27
|
Downloader,
|
|
30
28
|
DownloaderConfig,
|
|
31
29
|
DownloadResponse,
|
|
32
|
-
FileData,
|
|
33
|
-
FileDataSourceMetadata,
|
|
34
30
|
Indexer,
|
|
35
31
|
IndexerConfig,
|
|
36
|
-
SourceIdentifiers,
|
|
37
32
|
Uploader,
|
|
38
33
|
UploaderConfig,
|
|
39
34
|
UploadStager,
|
|
@@ -45,6 +40,13 @@ from unstructured_ingest.v2.processes.connector_registry import (
|
|
|
45
40
|
DestinationRegistryEntry,
|
|
46
41
|
SourceRegistryEntry,
|
|
47
42
|
)
|
|
43
|
+
from unstructured_ingest.v2.types.file_data import (
|
|
44
|
+
BatchFileData,
|
|
45
|
+
BatchItem,
|
|
46
|
+
FileData,
|
|
47
|
+
FileDataSourceMetadata,
|
|
48
|
+
SourceIdentifiers,
|
|
49
|
+
)
|
|
48
50
|
from unstructured_ingest.v2.utils import get_enhanced_element_id
|
|
49
51
|
|
|
50
52
|
if TYPE_CHECKING:
|
|
@@ -9,7 +9,6 @@ from pydantic import Field, Secret
|
|
|
9
9
|
|
|
10
10
|
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
11
11
|
from unstructured_ingest.v2.errors import ProviderError, UserAuthError, UserError
|
|
12
|
-
from unstructured_ingest.v2.interfaces import FileDataSourceMetadata
|
|
13
12
|
from unstructured_ingest.v2.logger import logger
|
|
14
13
|
from unstructured_ingest.v2.processes.connector_registry import (
|
|
15
14
|
DestinationRegistryEntry,
|
|
@@ -30,6 +29,7 @@ from unstructured_ingest.v2.processes.utils.blob_storage import (
|
|
|
30
29
|
BlobStoreUploadStager,
|
|
31
30
|
BlobStoreUploadStagerConfig,
|
|
32
31
|
)
|
|
32
|
+
from unstructured_ingest.v2.types.file_data import FileDataSourceMetadata
|
|
33
33
|
|
|
34
34
|
if TYPE_CHECKING:
|
|
35
35
|
from adlfs import AzureBlobFileSystem
|
|
@@ -11,7 +11,6 @@ from pydantic.functional_validators import BeforeValidator
|
|
|
11
11
|
|
|
12
12
|
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
13
13
|
from unstructured_ingest.v2.errors import ProviderError, UserAuthError, UserError
|
|
14
|
-
from unstructured_ingest.v2.interfaces import FileDataSourceMetadata
|
|
15
14
|
from unstructured_ingest.v2.logger import logger
|
|
16
15
|
from unstructured_ingest.v2.processes.connector_registry import (
|
|
17
16
|
DestinationRegistryEntry,
|
|
@@ -32,6 +31,7 @@ from unstructured_ingest.v2.processes.utils.blob_storage import (
|
|
|
32
31
|
BlobStoreUploadStager,
|
|
33
32
|
BlobStoreUploadStagerConfig,
|
|
34
33
|
)
|
|
34
|
+
from unstructured_ingest.v2.types.file_data import FileDataSourceMetadata
|
|
35
35
|
|
|
36
36
|
if TYPE_CHECKING:
|
|
37
37
|
from boxfs import BoxFileSystem
|
|
@@ -15,7 +15,6 @@ from unstructured_ingest.v2.errors import (
|
|
|
15
15
|
from unstructured_ingest.v2.errors import (
|
|
16
16
|
RateLimitError as CustomRateLimitError,
|
|
17
17
|
)
|
|
18
|
-
from unstructured_ingest.v2.interfaces import FileDataSourceMetadata
|
|
19
18
|
from unstructured_ingest.v2.logger import logger
|
|
20
19
|
from unstructured_ingest.v2.processes.connector_registry import (
|
|
21
20
|
DestinationRegistryEntry,
|
|
@@ -35,6 +34,7 @@ from unstructured_ingest.v2.processes.utils.blob_storage import (
|
|
|
35
34
|
BlobStoreUploadStager,
|
|
36
35
|
BlobStoreUploadStagerConfig,
|
|
37
36
|
)
|
|
37
|
+
from unstructured_ingest.v2.types.file_data import FileDataSourceMetadata
|
|
38
38
|
|
|
39
39
|
if TYPE_CHECKING:
|
|
40
40
|
pass
|
|
@@ -18,16 +18,18 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
18
18
|
Downloader,
|
|
19
19
|
DownloaderConfig,
|
|
20
20
|
DownloadResponse,
|
|
21
|
-
FileData,
|
|
22
|
-
FileDataSourceMetadata,
|
|
23
21
|
Indexer,
|
|
24
22
|
IndexerConfig,
|
|
25
|
-
SourceIdentifiers,
|
|
26
23
|
Uploader,
|
|
27
24
|
UploaderConfig,
|
|
28
25
|
)
|
|
29
26
|
from unstructured_ingest.v2.logger import logger
|
|
30
27
|
from unstructured_ingest.v2.processes.connectors.fsspec.utils import sterilize_dict
|
|
28
|
+
from unstructured_ingest.v2.types.file_data import (
|
|
29
|
+
FileData,
|
|
30
|
+
FileDataSourceMetadata,
|
|
31
|
+
SourceIdentifiers,
|
|
32
|
+
)
|
|
31
33
|
|
|
32
34
|
if TYPE_CHECKING:
|
|
33
35
|
from fsspec import AbstractFileSystem
|
|
@@ -12,7 +12,6 @@ from pydantic import Field, Secret
|
|
|
12
12
|
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
13
13
|
from unstructured_ingest.utils.string_and_date_utils import json_to_dict
|
|
14
14
|
from unstructured_ingest.v2.errors import ProviderError, UserError
|
|
15
|
-
from unstructured_ingest.v2.interfaces import FileDataSourceMetadata
|
|
16
15
|
from unstructured_ingest.v2.logger import logger
|
|
17
16
|
from unstructured_ingest.v2.processes.connector_registry import (
|
|
18
17
|
DestinationRegistryEntry,
|
|
@@ -32,6 +31,7 @@ from unstructured_ingest.v2.processes.utils.blob_storage import (
|
|
|
32
31
|
BlobStoreUploadStager,
|
|
33
32
|
BlobStoreUploadStagerConfig,
|
|
34
33
|
)
|
|
34
|
+
from unstructured_ingest.v2.types.file_data import FileDataSourceMetadata
|
|
35
35
|
|
|
36
36
|
if TYPE_CHECKING:
|
|
37
37
|
from gcsfs import GCSFileSystem
|
|
@@ -8,9 +8,6 @@ from pydantic import Field, Secret
|
|
|
8
8
|
|
|
9
9
|
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
10
10
|
from unstructured_ingest.v2.errors import ProviderError, UserAuthError, UserError
|
|
11
|
-
from unstructured_ingest.v2.interfaces import (
|
|
12
|
-
FileDataSourceMetadata,
|
|
13
|
-
)
|
|
14
11
|
from unstructured_ingest.v2.logger import logger
|
|
15
12
|
from unstructured_ingest.v2.processes.connector_registry import (
|
|
16
13
|
DestinationRegistryEntry,
|
|
@@ -30,6 +27,9 @@ from unstructured_ingest.v2.processes.utils.blob_storage import (
|
|
|
30
27
|
BlobStoreUploadStager,
|
|
31
28
|
BlobStoreUploadStagerConfig,
|
|
32
29
|
)
|
|
30
|
+
from unstructured_ingest.v2.types.file_data import (
|
|
31
|
+
FileDataSourceMetadata,
|
|
32
|
+
)
|
|
33
33
|
|
|
34
34
|
CONNECTOR_TYPE = "s3"
|
|
35
35
|
|
|
@@ -11,7 +11,6 @@ from urllib.parse import urlparse
|
|
|
11
11
|
from pydantic import Field, Secret
|
|
12
12
|
|
|
13
13
|
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
14
|
-
from unstructured_ingest.v2.interfaces import FileData, FileDataSourceMetadata
|
|
15
14
|
from unstructured_ingest.v2.processes.connector_registry import (
|
|
16
15
|
DestinationRegistryEntry,
|
|
17
16
|
SourceRegistryEntry,
|
|
@@ -30,6 +29,7 @@ from unstructured_ingest.v2.processes.utils.blob_storage import (
|
|
|
30
29
|
BlobStoreUploadStager,
|
|
31
30
|
BlobStoreUploadStagerConfig,
|
|
32
31
|
)
|
|
32
|
+
from unstructured_ingest.v2.types.file_data import FileData, FileDataSourceMetadata
|
|
33
33
|
|
|
34
34
|
if TYPE_CHECKING:
|
|
35
35
|
from fsspec.implementations.sftp import SFTPFileSystem
|
|
@@ -16,14 +16,16 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
16
16
|
Downloader,
|
|
17
17
|
DownloaderConfig,
|
|
18
18
|
DownloadResponse,
|
|
19
|
-
FileData,
|
|
20
|
-
FileDataSourceMetadata,
|
|
21
19
|
Indexer,
|
|
22
20
|
IndexerConfig,
|
|
23
|
-
SourceIdentifiers,
|
|
24
21
|
)
|
|
25
22
|
from unstructured_ingest.v2.logger import logger
|
|
26
23
|
from unstructured_ingest.v2.processes.connector_registry import SourceRegistryEntry
|
|
24
|
+
from unstructured_ingest.v2.types.file_data import (
|
|
25
|
+
FileData,
|
|
26
|
+
FileDataSourceMetadata,
|
|
27
|
+
SourceIdentifiers,
|
|
28
|
+
)
|
|
27
29
|
|
|
28
30
|
CONNECTOR_TYPE = "gitlab"
|
|
29
31
|
if TYPE_CHECKING:
|
|
@@ -21,15 +21,17 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
21
21
|
Downloader,
|
|
22
22
|
DownloaderConfig,
|
|
23
23
|
DownloadResponse,
|
|
24
|
-
FileData,
|
|
25
|
-
FileDataSourceMetadata,
|
|
26
24
|
Indexer,
|
|
27
25
|
IndexerConfig,
|
|
28
|
-
SourceIdentifiers,
|
|
29
26
|
)
|
|
30
27
|
from unstructured_ingest.v2.logger import logger
|
|
31
28
|
from unstructured_ingest.v2.processes.connector_registry import SourceRegistryEntry
|
|
32
29
|
from unstructured_ingest.v2.processes.connectors.utils import conform_string_to_dict
|
|
30
|
+
from unstructured_ingest.v2.types.file_data import (
|
|
31
|
+
FileData,
|
|
32
|
+
FileDataSourceMetadata,
|
|
33
|
+
SourceIdentifiers,
|
|
34
|
+
)
|
|
33
35
|
|
|
34
36
|
CONNECTOR_TYPE = "google_drive"
|
|
35
37
|
|
|
@@ -15,7 +15,6 @@ from unstructured_ingest.v2.errors import ProviderError, UserAuthError, UserErro
|
|
|
15
15
|
from unstructured_ingest.v2.interfaces import (
|
|
16
16
|
AccessConfig,
|
|
17
17
|
ConnectionConfig,
|
|
18
|
-
FileData,
|
|
19
18
|
UploaderConfig,
|
|
20
19
|
)
|
|
21
20
|
from unstructured_ingest.v2.logger import logger
|
|
@@ -27,6 +26,7 @@ from unstructured_ingest.v2.processes.connectors.sql.sql import (
|
|
|
27
26
|
SQLUploadStager,
|
|
28
27
|
SQLUploadStagerConfig,
|
|
29
28
|
)
|
|
29
|
+
from unstructured_ingest.v2.types.file_data import FileData
|
|
30
30
|
|
|
31
31
|
if TYPE_CHECKING:
|
|
32
32
|
from pyarrow import Table as ArrowTable
|
|
@@ -15,16 +15,18 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
15
15
|
Downloader,
|
|
16
16
|
DownloaderConfig,
|
|
17
17
|
DownloadResponse,
|
|
18
|
-
FileData,
|
|
19
|
-
FileDataSourceMetadata,
|
|
20
18
|
Indexer,
|
|
21
19
|
IndexerConfig,
|
|
22
|
-
SourceIdentifiers,
|
|
23
20
|
)
|
|
24
21
|
from unstructured_ingest.v2.logger import logger
|
|
25
22
|
from unstructured_ingest.v2.processes.connector_registry import (
|
|
26
23
|
SourceRegistryEntry,
|
|
27
24
|
)
|
|
25
|
+
from unstructured_ingest.v2.types.file_data import (
|
|
26
|
+
FileData,
|
|
27
|
+
FileDataSourceMetadata,
|
|
28
|
+
SourceIdentifiers,
|
|
29
|
+
)
|
|
28
30
|
|
|
29
31
|
if TYPE_CHECKING:
|
|
30
32
|
from atlassian import Jira
|
|
@@ -21,15 +21,17 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
21
21
|
Downloader,
|
|
22
22
|
DownloaderConfig,
|
|
23
23
|
DownloadResponse,
|
|
24
|
-
FileData,
|
|
25
|
-
FileDataSourceMetadata,
|
|
26
24
|
Indexer,
|
|
27
25
|
IndexerConfig,
|
|
28
|
-
SourceIdentifiers,
|
|
29
26
|
Uploader,
|
|
30
27
|
UploaderConfig,
|
|
31
28
|
)
|
|
32
29
|
from unstructured_ingest.v2.logger import logger
|
|
30
|
+
from unstructured_ingest.v2.types.file_data import (
|
|
31
|
+
FileData,
|
|
32
|
+
FileDataSourceMetadata,
|
|
33
|
+
SourceIdentifiers,
|
|
34
|
+
)
|
|
33
35
|
|
|
34
36
|
if TYPE_CHECKING:
|
|
35
37
|
from confluent_kafka import Consumer, Producer
|
|
@@ -11,7 +11,6 @@ from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
|
11
11
|
from unstructured_ingest.v2.interfaces import (
|
|
12
12
|
AccessConfig,
|
|
13
13
|
ConnectionConfig,
|
|
14
|
-
FileData,
|
|
15
14
|
Uploader,
|
|
16
15
|
UploaderConfig,
|
|
17
16
|
UploadStager,
|
|
@@ -21,6 +20,7 @@ from unstructured_ingest.v2.logger import logger
|
|
|
21
20
|
from unstructured_ingest.v2.processes.connector_registry import (
|
|
22
21
|
DestinationRegistryEntry,
|
|
23
22
|
)
|
|
23
|
+
from unstructured_ingest.v2.types.file_data import FileData
|
|
24
24
|
from unstructured_ingest.v2.utils import get_enhanced_element_id
|
|
25
25
|
|
|
26
26
|
if TYPE_CHECKING:
|
|
@@ -15,10 +15,14 @@ from unstructured_ingest.logger import logger
|
|
|
15
15
|
from unstructured_ingest.utils.data_prep import flatten_dict
|
|
16
16
|
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
17
17
|
from unstructured_ingest.v2.constants import RECORD_ID_LABEL
|
|
18
|
-
from unstructured_ingest.v2.interfaces
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
18
|
+
from unstructured_ingest.v2.interfaces import (
|
|
19
|
+
ConnectionConfig,
|
|
20
|
+
Uploader,
|
|
21
|
+
UploaderConfig,
|
|
22
|
+
UploadStager,
|
|
23
|
+
UploadStagerConfig,
|
|
24
|
+
)
|
|
25
|
+
from unstructured_ingest.v2.types.file_data import FileData
|
|
22
26
|
|
|
23
27
|
CONNECTOR_TYPE = "lancedb"
|
|
24
28
|
|
|
@@ -14,11 +14,8 @@ from unstructured_ingest.v2.interfaces import (
|
|
|
14
14
|
Downloader,
|
|
15
15
|
DownloaderConfig,
|
|
16
16
|
DownloadResponse,
|
|
17
|
-
FileData,
|
|
18
|
-
FileDataSourceMetadata,
|
|
19
17
|
Indexer,
|
|
20
18
|
IndexerConfig,
|
|
21
|
-
SourceIdentifiers,
|
|
22
19
|
Uploader,
|
|
23
20
|
UploaderConfig,
|
|
24
21
|
)
|
|
@@ -31,6 +28,11 @@ from unstructured_ingest.v2.processes.utils.blob_storage import (
|
|
|
31
28
|
BlobStoreUploadStager,
|
|
32
29
|
BlobStoreUploadStagerConfig,
|
|
33
30
|
)
|
|
31
|
+
from unstructured_ingest.v2.types.file_data import (
|
|
32
|
+
FileData,
|
|
33
|
+
FileDataSourceMetadata,
|
|
34
|
+
SourceIdentifiers,
|
|
35
|
+
)
|
|
34
36
|
|
|
35
37
|
CONNECTOR_TYPE = "local"
|
|
36
38
|
|