unstructured-ingest 0.5.25__py3-none-any.whl → 0.6.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- test/integration/connectors/databricks/test_volumes_native.py +1 -1
- test/integration/connectors/duckdb/test_duckdb.py +1 -1
- test/integration/connectors/duckdb/test_motherduck.py +1 -1
- test/integration/connectors/elasticsearch/test_elasticsearch.py +1 -1
- test/integration/connectors/elasticsearch/test_opensearch.py +1 -1
- test/integration/connectors/sql/test_databricks_delta_tables.py +1 -1
- test/integration/connectors/sql/test_postgres.py +1 -1
- test/integration/connectors/sql/test_singlestore.py +1 -1
- test/integration/connectors/sql/test_snowflake.py +1 -1
- test/integration/connectors/sql/test_sqlite.py +1 -1
- test/integration/connectors/test_astradb.py +1 -1
- test/integration/connectors/test_azure_ai_search.py +1 -1
- test/integration/connectors/test_chroma.py +1 -1
- test/integration/connectors/test_delta_table.py +1 -1
- test/integration/connectors/test_lancedb.py +1 -1
- test/integration/connectors/test_milvus.py +1 -1
- test/integration/connectors/test_mongodb.py +1 -1
- test/integration/connectors/test_neo4j.py +5 -5
- test/integration/connectors/test_onedrive.py +1 -1
- test/integration/connectors/test_pinecone.py +1 -1
- test/integration/connectors/test_qdrant.py +1 -1
- test/integration/connectors/test_redis.py +1 -1
- test/integration/connectors/test_s3.py +1 -1
- test/integration/connectors/test_vectara.py +1 -1
- test/integration/connectors/utils/validation/destination.py +2 -1
- test/integration/connectors/utils/validation/source.py +2 -1
- test/integration/connectors/weaviate/test_local.py +1 -1
- test/unit/test_html.py +1 -1
- test/unit/v2/connectors/ibm_watsonx/test_ibm_watsonx_s3.py +1 -1
- test/unit/v2/connectors/motherduck/test_base.py +1 -2
- test/unit/v2/connectors/sql/test_sql.py +1 -1
- unstructured_ingest/__version__.py +1 -1
- unstructured_ingest/utils/html.py +2 -1
- unstructured_ingest/v2/interfaces/__init__.py +0 -13
- unstructured_ingest/v2/interfaces/downloader.py +1 -1
- unstructured_ingest/v2/interfaces/indexer.py +1 -1
- unstructured_ingest/v2/interfaces/upload_stager.py +2 -2
- unstructured_ingest/v2/interfaces/uploader.py +2 -3
- unstructured_ingest/v2/pipeline/steps/chunk.py +1 -2
- unstructured_ingest/v2/pipeline/steps/download.py +2 -3
- unstructured_ingest/v2/pipeline/steps/embed.py +1 -2
- unstructured_ingest/v2/pipeline/steps/filter.py +1 -1
- unstructured_ingest/v2/pipeline/steps/partition.py +1 -2
- unstructured_ingest/v2/pipeline/steps/stage.py +2 -2
- unstructured_ingest/v2/pipeline/steps/uncompress.py +1 -1
- unstructured_ingest/v2/pipeline/steps/upload.py +2 -2
- unstructured_ingest/v2/processes/connectors/airtable.py +1 -2
- unstructured_ingest/v2/processes/connectors/astradb.py +7 -5
- unstructured_ingest/v2/processes/connectors/azure_ai_search.py +1 -1
- unstructured_ingest/v2/processes/connectors/chroma.py +1 -1
- unstructured_ingest/v2/processes/connectors/confluence.py +5 -3
- unstructured_ingest/v2/processes/connectors/couchbase.py +7 -5
- unstructured_ingest/v2/processes/connectors/databricks/volumes.py +5 -3
- unstructured_ingest/v2/processes/connectors/databricks/volumes_table.py +2 -1
- unstructured_ingest/v2/processes/connectors/delta_table.py +1 -1
- unstructured_ingest/v2/processes/connectors/discord.py +5 -3
- unstructured_ingest/v2/processes/connectors/duckdb/base.py +2 -1
- unstructured_ingest/v2/processes/connectors/duckdb/duckdb.py +1 -1
- unstructured_ingest/v2/processes/connectors/duckdb/motherduck.py +1 -1
- unstructured_ingest/v2/processes/connectors/elasticsearch/elasticsearch.py +7 -5
- unstructured_ingest/v2/processes/connectors/fsspec/azure.py +1 -1
- unstructured_ingest/v2/processes/connectors/fsspec/box.py +1 -1
- unstructured_ingest/v2/processes/connectors/fsspec/dropbox.py +1 -1
- unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py +5 -3
- unstructured_ingest/v2/processes/connectors/fsspec/gcs.py +1 -1
- unstructured_ingest/v2/processes/connectors/fsspec/s3.py +3 -3
- unstructured_ingest/v2/processes/connectors/fsspec/sftp.py +1 -1
- unstructured_ingest/v2/processes/connectors/gitlab.py +5 -3
- unstructured_ingest/v2/processes/connectors/google_drive.py +5 -3
- unstructured_ingest/v2/processes/connectors/ibm_watsonx/ibm_watsonx_s3.py +1 -1
- unstructured_ingest/v2/processes/connectors/jira.py +5 -3
- unstructured_ingest/v2/processes/connectors/kafka/kafka.py +5 -3
- unstructured_ingest/v2/processes/connectors/kdbai.py +1 -1
- unstructured_ingest/v2/processes/connectors/lancedb/lancedb.py +8 -4
- unstructured_ingest/v2/processes/connectors/local.py +5 -3
- unstructured_ingest/v2/processes/connectors/milvus.py +1 -1
- unstructured_ingest/v2/processes/connectors/mongodb.py +7 -5
- unstructured_ingest/v2/processes/connectors/neo4j.py +1 -1
- unstructured_ingest/v2/processes/connectors/notion/connector.py +5 -3
- unstructured_ingest/v2/processes/connectors/onedrive.py +5 -3
- unstructured_ingest/v2/processes/connectors/outlook.py +5 -2
- unstructured_ingest/v2/processes/connectors/pinecone.py +1 -1
- unstructured_ingest/v2/processes/connectors/qdrant/qdrant.py +1 -1
- unstructured_ingest/v2/processes/connectors/redisdb.py +1 -1
- unstructured_ingest/v2/processes/connectors/salesforce.py +5 -3
- unstructured_ingest/v2/processes/connectors/sharepoint.py +3 -3
- unstructured_ingest/v2/processes/connectors/slack.py +2 -2
- unstructured_ingest/v2/processes/connectors/sql/databricks_delta_tables.py +1 -1
- unstructured_ingest/v2/processes/connectors/sql/snowflake.py +1 -1
- unstructured_ingest/v2/processes/connectors/sql/sql.py +7 -5
- unstructured_ingest/v2/processes/connectors/sql/vastdb.py +3 -3
- unstructured_ingest/v2/processes/connectors/vectara.py +1 -1
- unstructured_ingest/v2/processes/connectors/weaviate/weaviate.py +1 -1
- unstructured_ingest/v2/processes/connectors/zendesk/zendesk.py +5 -3
- unstructured_ingest/v2/processes/filter.py +1 -1
- unstructured_ingest/v2/processes/uncompress.py +1 -1
- unstructured_ingest/v2/processes/utils/blob_storage.py +2 -1
- unstructured_ingest/v2/utils.py +1 -1
- {unstructured_ingest-0.5.25.dist-info → unstructured_ingest-0.6.0.dist-info}/METADATA +94 -94
- {unstructured_ingest-0.5.25.dist-info → unstructured_ingest-0.6.0.dist-info}/RECORD +104 -105
- unstructured_ingest/v2/interfaces/file_data.py +0 -13
- {unstructured_ingest-0.5.25.dist-info → unstructured_ingest-0.6.0.dist-info}/LICENSE.md +0 -0
- {unstructured_ingest-0.5.25.dist-info → unstructured_ingest-0.6.0.dist-info}/WHEEL +0 -0
- {unstructured_ingest-0.5.25.dist-info → unstructured_ingest-0.6.0.dist-info}/entry_points.txt +0 -0
- {unstructured_ingest-0.5.25.dist-info → unstructured_ingest-0.6.0.dist-info}/top_level.txt +0 -0
|
@@ -21,7 +21,6 @@ from test.integration.connectors.utils.validation.source import (
|
|
|
21
21
|
)
|
|
22
22
|
from test.integration.utils import requires_env
|
|
23
23
|
from unstructured_ingest.v2.errors import UserAuthError, UserError
|
|
24
|
-
from unstructured_ingest.v2.interfaces import FileData, SourceIdentifiers
|
|
25
24
|
from unstructured_ingest.v2.processes.connectors.databricks.volumes_native import (
|
|
26
25
|
CONNECTOR_TYPE,
|
|
27
26
|
DatabricksNativeVolumesAccessConfig,
|
|
@@ -33,6 +32,7 @@ from unstructured_ingest.v2.processes.connectors.databricks.volumes_native impor
|
|
|
33
32
|
DatabricksNativeVolumesUploader,
|
|
34
33
|
DatabricksNativeVolumesUploaderConfig,
|
|
35
34
|
)
|
|
35
|
+
from unstructured_ingest.v2.types.file_data import FileData, SourceIdentifiers
|
|
36
36
|
|
|
37
37
|
|
|
38
38
|
@dataclass
|
|
@@ -10,7 +10,6 @@ from test.integration.connectors.utils.validation.destination import (
|
|
|
10
10
|
StagerValidationConfigs,
|
|
11
11
|
stager_validation,
|
|
12
12
|
)
|
|
13
|
-
from unstructured_ingest.v2.interfaces.file_data import FileData, SourceIdentifiers
|
|
14
13
|
from unstructured_ingest.v2.processes.connectors.duckdb.duckdb import (
|
|
15
14
|
CONNECTOR_TYPE,
|
|
16
15
|
DuckDBConnectionConfig,
|
|
@@ -18,6 +17,7 @@ from unstructured_ingest.v2.processes.connectors.duckdb.duckdb import (
|
|
|
18
17
|
DuckDBUploaderConfig,
|
|
19
18
|
DuckDBUploadStager,
|
|
20
19
|
)
|
|
20
|
+
from unstructured_ingest.v2.types.file_data import FileData, SourceIdentifiers
|
|
21
21
|
|
|
22
22
|
|
|
23
23
|
@pytest.fixture
|
|
@@ -9,7 +9,6 @@ import pytest
|
|
|
9
9
|
|
|
10
10
|
from test.integration.connectors.utils.constants import DESTINATION_TAG, SQL_TAG
|
|
11
11
|
from test.integration.utils import requires_env
|
|
12
|
-
from unstructured_ingest.v2.interfaces.file_data import FileData, SourceIdentifiers
|
|
13
12
|
from unstructured_ingest.v2.processes.connectors.duckdb.motherduck import (
|
|
14
13
|
CONNECTOR_TYPE,
|
|
15
14
|
MotherDuckAccessConfig,
|
|
@@ -18,6 +17,7 @@ from unstructured_ingest.v2.processes.connectors.duckdb.motherduck import (
|
|
|
18
17
|
MotherDuckUploaderConfig,
|
|
19
18
|
MotherDuckUploadStager,
|
|
20
19
|
)
|
|
20
|
+
from unstructured_ingest.v2.types.file_data import FileData, SourceIdentifiers
|
|
21
21
|
|
|
22
22
|
|
|
23
23
|
@pytest.fixture
|
|
@@ -22,7 +22,7 @@ from test.integration.connectors.utils.validation.source import (
|
|
|
22
22
|
source_connector_validation,
|
|
23
23
|
)
|
|
24
24
|
from unstructured_ingest.error import DestinationConnectionError, SourceConnectionError
|
|
25
|
-
from unstructured_ingest.v2.
|
|
25
|
+
from unstructured_ingest.v2.types.file_data import FileData, SourceIdentifiers
|
|
26
26
|
from unstructured_ingest.v2.processes.connectors.elasticsearch.elasticsearch import (
|
|
27
27
|
CONNECTOR_TYPE,
|
|
28
28
|
ElasticsearchAccessConfig,
|
|
@@ -24,7 +24,6 @@ from unstructured_ingest.error import (
|
|
|
24
24
|
DestinationConnectionError,
|
|
25
25
|
SourceConnectionError,
|
|
26
26
|
)
|
|
27
|
-
from unstructured_ingest.v2.interfaces import FileData, SourceIdentifiers
|
|
28
27
|
from unstructured_ingest.v2.processes.connectors.elasticsearch.opensearch import (
|
|
29
28
|
CONNECTOR_TYPE,
|
|
30
29
|
OpenSearchAccessConfig,
|
|
@@ -38,6 +37,7 @@ from unstructured_ingest.v2.processes.connectors.elasticsearch.opensearch import
|
|
|
38
37
|
OpenSearchUploadStager,
|
|
39
38
|
OpenSearchUploadStagerConfig,
|
|
40
39
|
)
|
|
40
|
+
from unstructured_ingest.v2.types.file_data import FileData, SourceIdentifiers
|
|
41
41
|
|
|
42
42
|
SOURCE_INDEX_NAME = "movies"
|
|
43
43
|
DESTINATION_INDEX_NAME = "elements"
|
|
@@ -14,7 +14,6 @@ from pytest_mock import MockerFixture
|
|
|
14
14
|
|
|
15
15
|
from test.integration.connectors.utils.constants import DESTINATION_TAG, SQL_TAG, env_setup_path
|
|
16
16
|
from test.integration.utils import requires_env
|
|
17
|
-
from unstructured_ingest.v2.interfaces import FileData, SourceIdentifiers
|
|
18
17
|
from unstructured_ingest.v2.logger import logger
|
|
19
18
|
from unstructured_ingest.v2.processes.connectors.sql.databricks_delta_tables import (
|
|
20
19
|
CONNECTOR_TYPE,
|
|
@@ -24,6 +23,7 @@ from unstructured_ingest.v2.processes.connectors.sql.databricks_delta_tables imp
|
|
|
24
23
|
DatabricksDeltaTablesUploaderConfig,
|
|
25
24
|
DatabricksDeltaTablesUploadStager,
|
|
26
25
|
)
|
|
26
|
+
from unstructured_ingest.v2.types.file_data import FileData, SourceIdentifiers
|
|
27
27
|
|
|
28
28
|
CATALOG = "utic-dev-tech-fixtures"
|
|
29
29
|
|
|
@@ -20,7 +20,6 @@ from test.integration.connectors.utils.validation.source import (
|
|
|
20
20
|
SourceValidationConfigs,
|
|
21
21
|
source_connector_validation,
|
|
22
22
|
)
|
|
23
|
-
from unstructured_ingest.v2.interfaces import FileData, SourceIdentifiers
|
|
24
23
|
from unstructured_ingest.v2.processes.connectors.sql.postgres import (
|
|
25
24
|
CONNECTOR_TYPE,
|
|
26
25
|
PostgresAccessConfig,
|
|
@@ -32,6 +31,7 @@ from unstructured_ingest.v2.processes.connectors.sql.postgres import (
|
|
|
32
31
|
PostgresUploader,
|
|
33
32
|
PostgresUploadStager,
|
|
34
33
|
)
|
|
34
|
+
from unstructured_ingest.v2.types.file_data import FileData, SourceIdentifiers
|
|
35
35
|
|
|
36
36
|
SEED_DATA_ROWS = 10
|
|
37
37
|
|
|
@@ -20,7 +20,6 @@ from test.integration.connectors.utils.validation.source import (
|
|
|
20
20
|
SourceValidationConfigs,
|
|
21
21
|
source_connector_validation,
|
|
22
22
|
)
|
|
23
|
-
from unstructured_ingest.v2.interfaces import FileData, SourceIdentifiers
|
|
24
23
|
from unstructured_ingest.v2.processes.connectors.sql.singlestore import (
|
|
25
24
|
CONNECTOR_TYPE,
|
|
26
25
|
SingleStoreAccessConfig,
|
|
@@ -33,6 +32,7 @@ from unstructured_ingest.v2.processes.connectors.sql.singlestore import (
|
|
|
33
32
|
SingleStoreUploaderConfig,
|
|
34
33
|
SingleStoreUploadStager,
|
|
35
34
|
)
|
|
35
|
+
from unstructured_ingest.v2.types.file_data import FileData, SourceIdentifiers
|
|
36
36
|
|
|
37
37
|
SEED_DATA_ROWS = 10
|
|
38
38
|
|
|
@@ -22,7 +22,6 @@ from test.integration.connectors.utils.validation.source import (
|
|
|
22
22
|
source_connector_validation,
|
|
23
23
|
)
|
|
24
24
|
from test.integration.utils import requires_env
|
|
25
|
-
from unstructured_ingest.v2.interfaces import FileData, SourceIdentifiers
|
|
26
25
|
from unstructured_ingest.v2.processes.connectors.sql.snowflake import (
|
|
27
26
|
CONNECTOR_TYPE,
|
|
28
27
|
SnowflakeAccessConfig,
|
|
@@ -34,6 +33,7 @@ from unstructured_ingest.v2.processes.connectors.sql.snowflake import (
|
|
|
34
33
|
SnowflakeUploader,
|
|
35
34
|
SnowflakeUploadStager,
|
|
36
35
|
)
|
|
36
|
+
from unstructured_ingest.v2.types.file_data import FileData, SourceIdentifiers
|
|
37
37
|
|
|
38
38
|
SEED_DATA_ROWS = 20
|
|
39
39
|
|
|
@@ -20,7 +20,6 @@ from test.integration.connectors.utils.validation.source import (
|
|
|
20
20
|
SourceValidationConfigs,
|
|
21
21
|
source_connector_validation,
|
|
22
22
|
)
|
|
23
|
-
from unstructured_ingest.v2.interfaces import FileData, SourceIdentifiers
|
|
24
23
|
from unstructured_ingest.v2.processes.connectors.sql.sqlite import (
|
|
25
24
|
CONNECTOR_TYPE,
|
|
26
25
|
SQLiteConnectionConfig,
|
|
@@ -31,6 +30,7 @@ from unstructured_ingest.v2.processes.connectors.sql.sqlite import (
|
|
|
31
30
|
SQLiteUploader,
|
|
32
31
|
SQLiteUploadStager,
|
|
33
32
|
)
|
|
33
|
+
from unstructured_ingest.v2.types.file_data import FileData, SourceIdentifiers
|
|
34
34
|
|
|
35
35
|
SEED_DATA_ROWS = 10
|
|
36
36
|
|
|
@@ -20,7 +20,6 @@ from test.integration.connectors.utils.validation.source import (
|
|
|
20
20
|
source_connector_validation,
|
|
21
21
|
)
|
|
22
22
|
from test.integration.utils import requires_env
|
|
23
|
-
from unstructured_ingest.v2.interfaces import FileData, SourceIdentifiers
|
|
24
23
|
from unstructured_ingest.v2.processes.connectors.astradb import (
|
|
25
24
|
CONNECTOR_TYPE,
|
|
26
25
|
AstraDBAccessConfig,
|
|
@@ -36,6 +35,7 @@ from unstructured_ingest.v2.processes.connectors.astradb import (
|
|
|
36
35
|
DestinationConnectionError,
|
|
37
36
|
SourceConnectionError,
|
|
38
37
|
)
|
|
38
|
+
from unstructured_ingest.v2.types.file_data import FileData, SourceIdentifiers
|
|
39
39
|
|
|
40
40
|
EXISTENT_COLLECTION_NAME = "ingest_test_src"
|
|
41
41
|
NONEXISTENT_COLLECTION_NAME = "nonexistant"
|
|
@@ -29,7 +29,6 @@ from test.integration.connectors.utils.validation.destination import (
|
|
|
29
29
|
stager_validation,
|
|
30
30
|
)
|
|
31
31
|
from test.integration.utils import requires_env
|
|
32
|
-
from unstructured_ingest.v2.interfaces import FileData, SourceIdentifiers
|
|
33
32
|
from unstructured_ingest.v2.processes.connectors.azure_ai_search import (
|
|
34
33
|
CONNECTOR_TYPE,
|
|
35
34
|
RECORD_ID_LABEL,
|
|
@@ -40,6 +39,7 @@ from unstructured_ingest.v2.processes.connectors.azure_ai_search import (
|
|
|
40
39
|
AzureAISearchUploadStager,
|
|
41
40
|
AzureAISearchUploadStagerConfig,
|
|
42
41
|
)
|
|
42
|
+
from unstructured_ingest.v2.types.file_data import FileData, SourceIdentifiers
|
|
43
43
|
|
|
44
44
|
repo_path = Path(__file__).parent.resolve()
|
|
45
45
|
|
|
@@ -27,7 +27,7 @@ from test.integration.connectors.utils.validation.destination import (
|
|
|
27
27
|
StagerValidationConfigs,
|
|
28
28
|
stager_validation,
|
|
29
29
|
)
|
|
30
|
-
from unstructured_ingest.v2.
|
|
30
|
+
from unstructured_ingest.v2.types.file_data import FileData, SourceIdentifiers
|
|
31
31
|
from unstructured_ingest.v2.processes.connectors.chroma import (
|
|
32
32
|
CONNECTOR_TYPE,
|
|
33
33
|
ChromaConnectionConfig,
|
|
@@ -8,7 +8,6 @@ from fsspec import get_filesystem_class
|
|
|
8
8
|
|
|
9
9
|
from test.integration.connectors.utils.constants import DESTINATION_TAG, SQL_TAG
|
|
10
10
|
from test.integration.utils import requires_env
|
|
11
|
-
from unstructured_ingest.v2.interfaces import FileData, SourceIdentifiers
|
|
12
11
|
from unstructured_ingest.v2.processes.connectors.delta_table import (
|
|
13
12
|
CONNECTOR_TYPE,
|
|
14
13
|
DeltaTableAccessConfig,
|
|
@@ -18,6 +17,7 @@ from unstructured_ingest.v2.processes.connectors.delta_table import (
|
|
|
18
17
|
DeltaTableUploadStager,
|
|
19
18
|
DeltaTableUploadStagerConfig,
|
|
20
19
|
)
|
|
20
|
+
from unstructured_ingest.v2.types.file_data import FileData, SourceIdentifiers
|
|
21
21
|
|
|
22
22
|
multiprocessing.set_start_method("spawn")
|
|
23
23
|
|
|
@@ -13,7 +13,6 @@ from upath import UPath
|
|
|
13
13
|
|
|
14
14
|
from test.integration.connectors.utils.constants import DESTINATION_TAG, VECTOR_DB_TAG
|
|
15
15
|
from unstructured_ingest.v2.constants import RECORD_ID_LABEL
|
|
16
|
-
from unstructured_ingest.v2.interfaces.file_data import FileData, SourceIdentifiers
|
|
17
16
|
from unstructured_ingest.v2.processes.connectors.lancedb.aws import (
|
|
18
17
|
LanceDBAwsAccessConfig,
|
|
19
18
|
LanceDBAwsConnectionConfig,
|
|
@@ -39,6 +38,7 @@ from unstructured_ingest.v2.processes.connectors.lancedb.local import (
|
|
|
39
38
|
LanceDBLocalConnectionConfig,
|
|
40
39
|
LanceDBLocalUploader,
|
|
41
40
|
)
|
|
41
|
+
from unstructured_ingest.v2.types.file_data import FileData, SourceIdentifiers
|
|
42
42
|
|
|
43
43
|
DATABASE_NAME = "database"
|
|
44
44
|
TABLE_NAME = "elements"
|
|
@@ -25,7 +25,6 @@ from test.integration.connectors.utils.validation.destination import (
|
|
|
25
25
|
stager_validation,
|
|
26
26
|
)
|
|
27
27
|
from unstructured_ingest.error import DestinationConnectionError
|
|
28
|
-
from unstructured_ingest.v2.interfaces import FileData, SourceIdentifiers
|
|
29
28
|
from unstructured_ingest.v2.processes.connectors.milvus import (
|
|
30
29
|
CONNECTOR_TYPE,
|
|
31
30
|
MilvusConnectionConfig,
|
|
@@ -33,6 +32,7 @@ from unstructured_ingest.v2.processes.connectors.milvus import (
|
|
|
33
32
|
MilvusUploaderConfig,
|
|
34
33
|
MilvusUploadStager,
|
|
35
34
|
)
|
|
35
|
+
from unstructured_ingest.v2.types.file_data import FileData, SourceIdentifiers
|
|
36
36
|
|
|
37
37
|
DB_NAME = "test_database"
|
|
38
38
|
EXISTENT_COLLECTION_NAME = "test_collection"
|
|
@@ -20,7 +20,6 @@ from test.integration.connectors.utils.validation.source import (
|
|
|
20
20
|
)
|
|
21
21
|
from test.integration.utils import requires_env
|
|
22
22
|
from unstructured_ingest.error import DestinationConnectionError, SourceConnectionError
|
|
23
|
-
from unstructured_ingest.v2.interfaces import FileData, SourceIdentifiers
|
|
24
23
|
from unstructured_ingest.v2.processes.connectors.mongodb import (
|
|
25
24
|
CONNECTOR_TYPE,
|
|
26
25
|
MongoDBAccessConfig,
|
|
@@ -32,6 +31,7 @@ from unstructured_ingest.v2.processes.connectors.mongodb import (
|
|
|
32
31
|
MongoDBUploader,
|
|
33
32
|
MongoDBUploaderConfig,
|
|
34
33
|
)
|
|
34
|
+
from unstructured_ingest.v2.types.file_data import FileData, SourceIdentifiers
|
|
35
35
|
|
|
36
36
|
SOURCE_COLLECTION = "sample-mongodb-data"
|
|
37
37
|
|
|
@@ -13,11 +13,6 @@ from test.integration.connectors.utils.constants import DESTINATION_TAG, GRAPH_D
|
|
|
13
13
|
from test.integration.connectors.utils.docker import container_context
|
|
14
14
|
from unstructured_ingest.error import DestinationConnectionError
|
|
15
15
|
from unstructured_ingest.utils.chunking import elements_from_base64_gzipped_json
|
|
16
|
-
from unstructured_ingest.v2.interfaces.file_data import (
|
|
17
|
-
FileData,
|
|
18
|
-
FileDataSourceMetadata,
|
|
19
|
-
SourceIdentifiers,
|
|
20
|
-
)
|
|
21
16
|
from unstructured_ingest.v2.processes.connectors.neo4j import (
|
|
22
17
|
CONNECTOR_TYPE,
|
|
23
18
|
Label,
|
|
@@ -28,6 +23,11 @@ from unstructured_ingest.v2.processes.connectors.neo4j import (
|
|
|
28
23
|
Neo4jUploadStager,
|
|
29
24
|
Relationship,
|
|
30
25
|
)
|
|
26
|
+
from unstructured_ingest.v2.types.file_data import (
|
|
27
|
+
FileData,
|
|
28
|
+
FileDataSourceMetadata,
|
|
29
|
+
SourceIdentifiers,
|
|
30
|
+
)
|
|
31
31
|
|
|
32
32
|
USERNAME = "neo4j"
|
|
33
33
|
PASSWORD = "password"
|
|
@@ -15,7 +15,6 @@ from test.integration.connectors.utils.validation.source import (
|
|
|
15
15
|
source_connector_validation,
|
|
16
16
|
)
|
|
17
17
|
from test.integration.utils import requires_env
|
|
18
|
-
from unstructured_ingest.v2.interfaces import FileData, SourceIdentifiers
|
|
19
18
|
from unstructured_ingest.v2.processes.connectors.onedrive import (
|
|
20
19
|
CONNECTOR_TYPE,
|
|
21
20
|
OnedriveAccessConfig,
|
|
@@ -27,6 +26,7 @@ from unstructured_ingest.v2.processes.connectors.onedrive import (
|
|
|
27
26
|
OnedriveUploader,
|
|
28
27
|
OnedriveUploaderConfig,
|
|
29
28
|
)
|
|
29
|
+
from unstructured_ingest.v2.types.file_data import FileData, SourceIdentifiers
|
|
30
30
|
|
|
31
31
|
|
|
32
32
|
@pytest.fixture
|
|
@@ -19,7 +19,6 @@ from test.integration.connectors.utils.validation.destination import (
|
|
|
19
19
|
)
|
|
20
20
|
from test.integration.utils import requires_env
|
|
21
21
|
from unstructured_ingest.error import DestinationConnectionError
|
|
22
|
-
from unstructured_ingest.v2.interfaces import FileData, SourceIdentifiers
|
|
23
22
|
from unstructured_ingest.v2.logger import logger
|
|
24
23
|
from unstructured_ingest.v2.processes.connectors.pinecone import (
|
|
25
24
|
CONNECTOR_TYPE,
|
|
@@ -31,6 +30,7 @@ from unstructured_ingest.v2.processes.connectors.pinecone import (
|
|
|
31
30
|
PineconeUploadStager,
|
|
32
31
|
PineconeUploadStagerConfig,
|
|
33
32
|
)
|
|
33
|
+
from unstructured_ingest.v2.types.file_data import FileData, SourceIdentifiers
|
|
34
34
|
|
|
35
35
|
METADATA_BYTES_LIMIT = (
|
|
36
36
|
40960 # 40KB https://docs.pinecone.io/reference/quotas-and-limits#hard-limits
|
|
@@ -16,7 +16,6 @@ from test.integration.connectors.utils.validation.destination import (
|
|
|
16
16
|
stager_validation,
|
|
17
17
|
)
|
|
18
18
|
from test.integration.utils import requires_env
|
|
19
|
-
from unstructured_ingest.v2.interfaces.file_data import FileData, SourceIdentifiers
|
|
20
19
|
from unstructured_ingest.v2.processes.connectors.qdrant.cloud import (
|
|
21
20
|
CloudQdrantAccessConfig,
|
|
22
21
|
CloudQdrantConnectionConfig,
|
|
@@ -45,6 +44,7 @@ from unstructured_ingest.v2.processes.connectors.qdrant.server import (
|
|
|
45
44
|
ServerQdrantUploadStager,
|
|
46
45
|
ServerQdrantUploadStagerConfig,
|
|
47
46
|
)
|
|
47
|
+
from unstructured_ingest.v2.types.file_data import FileData, SourceIdentifiers
|
|
48
48
|
|
|
49
49
|
COLLECTION_NAME = f"test-coll-{uuid.uuid4().hex[:12]}"
|
|
50
50
|
VECTORS_CONFIG = {"size": 384, "distance": "Cosine"}
|
|
@@ -11,7 +11,6 @@ from redis.asyncio import Redis, from_url
|
|
|
11
11
|
|
|
12
12
|
from test.integration.connectors.utils.constants import DESTINATION_TAG, NOSQL_TAG
|
|
13
13
|
from test.integration.utils import requires_env
|
|
14
|
-
from unstructured_ingest.v2.interfaces.file_data import FileData, SourceIdentifiers
|
|
15
14
|
from unstructured_ingest.v2.processes.connectors.redisdb import (
|
|
16
15
|
CONNECTOR_TYPE as REDIS_CONNECTOR_TYPE,
|
|
17
16
|
)
|
|
@@ -21,6 +20,7 @@ from unstructured_ingest.v2.processes.connectors.redisdb import (
|
|
|
21
20
|
RedisUploader,
|
|
22
21
|
RedisUploaderConfig,
|
|
23
22
|
)
|
|
23
|
+
from unstructured_ingest.v2.types.file_data import FileData, SourceIdentifiers
|
|
24
24
|
|
|
25
25
|
|
|
26
26
|
async def delete_record(client: Redis, element_id: str, key_prefix: str) -> None:
|
|
@@ -18,7 +18,6 @@ from test.integration.connectors.utils.validation.source import (
|
|
|
18
18
|
)
|
|
19
19
|
from test.integration.utils import requires_env
|
|
20
20
|
from unstructured_ingest.v2.errors import UserAuthError, UserError
|
|
21
|
-
from unstructured_ingest.v2.interfaces import FileData, SourceIdentifiers
|
|
22
21
|
from unstructured_ingest.v2.processes.connectors.fsspec.s3 import (
|
|
23
22
|
CONNECTOR_TYPE,
|
|
24
23
|
S3AccessConfig,
|
|
@@ -30,6 +29,7 @@ from unstructured_ingest.v2.processes.connectors.fsspec.s3 import (
|
|
|
30
29
|
S3Uploader,
|
|
31
30
|
S3UploaderConfig,
|
|
32
31
|
)
|
|
32
|
+
from unstructured_ingest.v2.types.file_data import FileData, SourceIdentifiers
|
|
33
33
|
|
|
34
34
|
|
|
35
35
|
def validate_predownload_file_data(file_data: FileData):
|
|
@@ -11,7 +11,6 @@ import requests
|
|
|
11
11
|
|
|
12
12
|
from test.integration.connectors.utils.constants import DESTINATION_TAG, NOSQL_TAG
|
|
13
13
|
from test.integration.utils import requires_env
|
|
14
|
-
from unstructured_ingest.v2.interfaces.file_data import FileData, SourceIdentifiers
|
|
15
14
|
from unstructured_ingest.v2.logger import logger
|
|
16
15
|
from unstructured_ingest.v2.processes.connectors.vectara import (
|
|
17
16
|
CONNECTOR_TYPE as VECTARA_CONNECTOR_TYPE,
|
|
@@ -24,6 +23,7 @@ from unstructured_ingest.v2.processes.connectors.vectara import (
|
|
|
24
23
|
VectaraUploadStager,
|
|
25
24
|
VectaraUploadStagerConfig,
|
|
26
25
|
)
|
|
26
|
+
from unstructured_ingest.v2.types.file_data import FileData, SourceIdentifiers
|
|
27
27
|
|
|
28
28
|
|
|
29
29
|
def validate_upload(document: dict, expected_data: dict):
|
|
@@ -4,7 +4,8 @@ from pathlib import Path
|
|
|
4
4
|
|
|
5
5
|
from test.integration.connectors.utils.validation.utils import ValidationConfig
|
|
6
6
|
from unstructured_ingest.utils.data_prep import get_data
|
|
7
|
-
from unstructured_ingest.v2.interfaces import
|
|
7
|
+
from unstructured_ingest.v2.interfaces import UploadStager
|
|
8
|
+
from unstructured_ingest.v2.types.file_data import FileData, SourceIdentifiers
|
|
8
9
|
|
|
9
10
|
|
|
10
11
|
class StagerValidationConfigs(ValidationConfig):
|
|
@@ -8,7 +8,8 @@ from deepdiff import DeepDiff
|
|
|
8
8
|
from pydantic import Field
|
|
9
9
|
|
|
10
10
|
from test.integration.connectors.utils.validation.utils import ValidationConfig
|
|
11
|
-
from unstructured_ingest.v2.interfaces import Downloader,
|
|
11
|
+
from unstructured_ingest.v2.interfaces import Downloader, Indexer
|
|
12
|
+
from unstructured_ingest.v2.types.file_data import FileData
|
|
12
13
|
|
|
13
14
|
NONSTANDARD_METADATA_FIELDS = {
|
|
14
15
|
"additional_metadata.@microsoft.graph.downloadUrl": [
|
|
@@ -9,7 +9,6 @@ from weaviate.client import WeaviateClient
|
|
|
9
9
|
|
|
10
10
|
from test.integration.connectors.utils.constants import DESTINATION_TAG, VECTOR_DB_TAG
|
|
11
11
|
from test.integration.connectors.utils.docker import container_context
|
|
12
|
-
from unstructured_ingest.v2.interfaces import FileData, SourceIdentifiers
|
|
13
12
|
from unstructured_ingest.v2.processes.connectors.weaviate.local import (
|
|
14
13
|
CONNECTOR_TYPE,
|
|
15
14
|
LocalWeaviateConnectionConfig,
|
|
@@ -17,6 +16,7 @@ from unstructured_ingest.v2.processes.connectors.weaviate.local import (
|
|
|
17
16
|
LocalWeaviateUploaderConfig,
|
|
18
17
|
LocalWeaviateUploadStager,
|
|
19
18
|
)
|
|
19
|
+
from unstructured_ingest.v2.types.file_data import FileData, SourceIdentifiers
|
|
20
20
|
|
|
21
21
|
COLLECTION_NAME = "elements"
|
|
22
22
|
|
test/unit/test_html.py
CHANGED
|
@@ -5,7 +5,7 @@ from bs4 import BeautifulSoup
|
|
|
5
5
|
from pytest_mock import MockerFixture
|
|
6
6
|
|
|
7
7
|
from unstructured_ingest.utils.html import HtmlMixin
|
|
8
|
-
from unstructured_ingest.v2.
|
|
8
|
+
from unstructured_ingest.v2.types.file_data import FileData, SourceIdentifiers
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
def test_extract_images(mocker: MockerFixture):
|
|
@@ -8,7 +8,6 @@ from pyiceberg.exceptions import CommitFailedException
|
|
|
8
8
|
from pytest_mock import MockerFixture
|
|
9
9
|
|
|
10
10
|
from unstructured_ingest.v2.errors import ProviderError, UserError
|
|
11
|
-
from unstructured_ingest.v2.interfaces.file_data import FileData, SourceIdentifiers
|
|
12
11
|
from unstructured_ingest.v2.processes.connectors.ibm_watsonx import IBM_WATSONX_S3_CONNECTOR_TYPE
|
|
13
12
|
from unstructured_ingest.v2.processes.connectors.ibm_watsonx.ibm_watsonx_s3 import (
|
|
14
13
|
IbmWatsonxAccessConfig,
|
|
@@ -16,6 +15,7 @@ from unstructured_ingest.v2.processes.connectors.ibm_watsonx.ibm_watsonx_s3 impo
|
|
|
16
15
|
IbmWatsonxUploader,
|
|
17
16
|
IbmWatsonxUploaderConfig,
|
|
18
17
|
)
|
|
18
|
+
from unstructured_ingest.v2.types.file_data import FileData, SourceIdentifiers
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
@pytest.fixture
|
|
@@ -3,10 +3,9 @@ from pathlib import Path
|
|
|
3
3
|
import pytest
|
|
4
4
|
from pytest_mock import MockerFixture
|
|
5
5
|
|
|
6
|
-
from unstructured_ingest.v2.interfaces import FileData
|
|
7
|
-
from unstructured_ingest.v2.interfaces.file_data import SourceIdentifiers
|
|
8
6
|
from unstructured_ingest.v2.interfaces.upload_stager import UploadStagerConfig
|
|
9
7
|
from unstructured_ingest.v2.processes.connectors.duckdb.base import BaseDuckDBUploadStager
|
|
8
|
+
from unstructured_ingest.v2.types.file_data import FileData, SourceIdentifiers
|
|
10
9
|
|
|
11
10
|
|
|
12
11
|
@pytest.fixture
|
|
@@ -4,13 +4,13 @@ import pandas as pd
|
|
|
4
4
|
import pytest
|
|
5
5
|
from pytest_mock import MockerFixture
|
|
6
6
|
|
|
7
|
-
from unstructured_ingest.v2.interfaces.file_data import FileData, SourceIdentifiers
|
|
8
7
|
from unstructured_ingest.v2.processes.connectors.sql.sql import (
|
|
9
8
|
SQLConnectionConfig,
|
|
10
9
|
SQLUploader,
|
|
11
10
|
SQLUploaderConfig,
|
|
12
11
|
SQLUploadStager,
|
|
13
12
|
)
|
|
13
|
+
from unstructured_ingest.v2.types.file_data import FileData, SourceIdentifiers
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
@pytest.fixture
|
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.
|
|
1
|
+
__version__ = "0.6.0" # pragma: no cover
|
|
@@ -7,8 +7,9 @@ from uuid import NAMESPACE_DNS, uuid5
|
|
|
7
7
|
from pydantic import BaseModel, Field
|
|
8
8
|
|
|
9
9
|
from unstructured_ingest.utils.dep_check import requires_dependencies
|
|
10
|
-
from unstructured_ingest.v2.interfaces import DownloadResponse
|
|
10
|
+
from unstructured_ingest.v2.interfaces import DownloadResponse
|
|
11
11
|
from unstructured_ingest.v2.logger import logger
|
|
12
|
+
from unstructured_ingest.v2.types.file_data import FileData, SourceIdentifiers
|
|
12
13
|
|
|
13
14
|
if TYPE_CHECKING:
|
|
14
15
|
from bs4.element import Tag
|
|
@@ -1,11 +1,3 @@
|
|
|
1
|
-
from unstructured_ingest.v2.types.file_data import (
|
|
2
|
-
BatchFileData,
|
|
3
|
-
BatchItem,
|
|
4
|
-
FileData,
|
|
5
|
-
FileDataSourceMetadata,
|
|
6
|
-
SourceIdentifiers,
|
|
7
|
-
)
|
|
8
|
-
|
|
9
1
|
from .connector import AccessConfig, BaseConnector, ConnectionConfig
|
|
10
2
|
from .downloader import Downloader, DownloaderConfig, DownloadResponse, download_responses
|
|
11
3
|
from .indexer import Indexer, IndexerConfig
|
|
@@ -19,7 +11,6 @@ __all__ = [
|
|
|
19
11
|
"download_responses",
|
|
20
12
|
"Downloader",
|
|
21
13
|
"DownloaderConfig",
|
|
22
|
-
"FileData",
|
|
23
14
|
"Indexer",
|
|
24
15
|
"IndexerConfig",
|
|
25
16
|
"BaseProcess",
|
|
@@ -28,13 +19,9 @@ __all__ = [
|
|
|
28
19
|
"UploadStagerConfig",
|
|
29
20
|
"Uploader",
|
|
30
21
|
"UploaderConfig",
|
|
31
|
-
"SourceIdentifiers",
|
|
32
22
|
"UploadContent",
|
|
33
23
|
"AccessConfig",
|
|
34
24
|
"ConnectionConfig",
|
|
35
25
|
"BaseConnector",
|
|
36
|
-
"FileDataSourceMetadata",
|
|
37
|
-
"BatchFileData",
|
|
38
|
-
"BatchItem",
|
|
39
26
|
"VectorDBUploader",
|
|
40
27
|
]
|
|
@@ -6,8 +6,8 @@ from typing import Any, Optional, TypedDict, TypeVar, Union
|
|
|
6
6
|
from pydantic import BaseModel, Field
|
|
7
7
|
|
|
8
8
|
from unstructured_ingest.v2.interfaces.connector import BaseConnector
|
|
9
|
-
from unstructured_ingest.v2.interfaces.file_data import FileData
|
|
10
9
|
from unstructured_ingest.v2.interfaces.process import BaseProcess
|
|
10
|
+
from unstructured_ingest.v2.types.file_data import FileData
|
|
11
11
|
|
|
12
12
|
|
|
13
13
|
class DownloaderConfig(BaseModel):
|
|
@@ -4,8 +4,8 @@ from typing import Any, AsyncGenerator, Generator, Optional, TypeVar
|
|
|
4
4
|
from pydantic import BaseModel
|
|
5
5
|
|
|
6
6
|
from unstructured_ingest.v2.interfaces.connector import BaseConnector
|
|
7
|
-
from unstructured_ingest.v2.interfaces.file_data import FileData
|
|
8
7
|
from unstructured_ingest.v2.interfaces.process import BaseProcess
|
|
8
|
+
from unstructured_ingest.v2.types.file_data import FileData
|
|
9
9
|
|
|
10
10
|
|
|
11
11
|
class IndexerConfig(BaseModel):
|
|
@@ -7,8 +7,8 @@ from pydantic import BaseModel
|
|
|
7
7
|
|
|
8
8
|
from unstructured_ingest.utils import ndjson
|
|
9
9
|
from unstructured_ingest.utils.data_prep import get_data, write_data
|
|
10
|
-
from unstructured_ingest.v2.interfaces
|
|
11
|
-
from unstructured_ingest.v2.
|
|
10
|
+
from unstructured_ingest.v2.interfaces import BaseProcess
|
|
11
|
+
from unstructured_ingest.v2.types.file_data import FileData
|
|
12
12
|
|
|
13
13
|
|
|
14
14
|
class UploadStagerConfig(BaseModel):
|
|
@@ -6,9 +6,8 @@ from typing import Any, TypeVar
|
|
|
6
6
|
from pydantic import BaseModel
|
|
7
7
|
|
|
8
8
|
from unstructured_ingest.utils.data_prep import get_data
|
|
9
|
-
from unstructured_ingest.v2.interfaces
|
|
10
|
-
from unstructured_ingest.v2.
|
|
11
|
-
from unstructured_ingest.v2.interfaces.process import BaseProcess
|
|
9
|
+
from unstructured_ingest.v2.interfaces import BaseConnector, BaseProcess
|
|
10
|
+
from unstructured_ingest.v2.types.file_data import FileData
|
|
12
11
|
|
|
13
12
|
|
|
14
13
|
class UploaderConfig(BaseModel):
|
|
@@ -5,11 +5,10 @@ from pathlib import Path
|
|
|
5
5
|
from typing import Callable, Optional, TypedDict
|
|
6
6
|
|
|
7
7
|
from unstructured_ingest.utils.data_prep import write_data
|
|
8
|
-
from unstructured_ingest.v2.interfaces import FileData
|
|
9
|
-
from unstructured_ingest.v2.interfaces.file_data import file_data_from_file
|
|
10
8
|
from unstructured_ingest.v2.logger import logger
|
|
11
9
|
from unstructured_ingest.v2.pipeline.interfaces import PipelineStep
|
|
12
10
|
from unstructured_ingest.v2.processes.chunker import Chunker
|
|
11
|
+
from unstructured_ingest.v2.types.file_data import FileData, file_data_from_file
|
|
13
12
|
from unstructured_ingest.v2.utils import serialize_base_model_json
|
|
14
13
|
|
|
15
14
|
STEP_ID = "chunk"
|
|
@@ -6,11 +6,10 @@ from dataclasses import dataclass
|
|
|
6
6
|
from pathlib import Path
|
|
7
7
|
from typing import Callable, Optional, TypedDict, TypeVar
|
|
8
8
|
|
|
9
|
-
from unstructured_ingest.v2.interfaces import
|
|
10
|
-
from unstructured_ingest.v2.interfaces.downloader import Downloader
|
|
11
|
-
from unstructured_ingest.v2.interfaces.file_data import file_data_from_file
|
|
9
|
+
from unstructured_ingest.v2.interfaces import Downloader, download_responses
|
|
12
10
|
from unstructured_ingest.v2.logger import logger
|
|
13
11
|
from unstructured_ingest.v2.pipeline.interfaces import PipelineStep
|
|
12
|
+
from unstructured_ingest.v2.types.file_data import FileData, file_data_from_file
|
|
14
13
|
from unstructured_ingest.v2.utils import serialize_base_model_json
|
|
15
14
|
|
|
16
15
|
DownloaderT = TypeVar("DownloaderT", bound=Downloader)
|
|
@@ -5,11 +5,10 @@ from pathlib import Path
|
|
|
5
5
|
from typing import Callable, Optional, TypedDict
|
|
6
6
|
|
|
7
7
|
from unstructured_ingest.utils.data_prep import write_data
|
|
8
|
-
from unstructured_ingest.v2.interfaces import FileData
|
|
9
|
-
from unstructured_ingest.v2.interfaces.file_data import file_data_from_file
|
|
10
8
|
from unstructured_ingest.v2.logger import logger
|
|
11
9
|
from unstructured_ingest.v2.pipeline.interfaces import PipelineStep
|
|
12
10
|
from unstructured_ingest.v2.processes.embedder import Embedder
|
|
11
|
+
from unstructured_ingest.v2.types.file_data import FileData, file_data_from_file
|
|
13
12
|
from unstructured_ingest.v2.utils import serialize_base_model_json
|
|
14
13
|
|
|
15
14
|
STEP_ID = "embed"
|
|
@@ -2,10 +2,10 @@ import asyncio
|
|
|
2
2
|
from dataclasses import dataclass
|
|
3
3
|
from typing import Callable, Optional
|
|
4
4
|
|
|
5
|
-
from unstructured_ingest.v2.interfaces.file_data import file_data_from_file
|
|
6
5
|
from unstructured_ingest.v2.logger import logger
|
|
7
6
|
from unstructured_ingest.v2.pipeline.interfaces import PipelineStep
|
|
8
7
|
from unstructured_ingest.v2.processes.filter import Filterer
|
|
8
|
+
from unstructured_ingest.v2.types.file_data import file_data_from_file
|
|
9
9
|
|
|
10
10
|
STEP_ID = "filter"
|
|
11
11
|
|