unstructured-ingest 0.0.17__py3-none-any.whl → 0.0.19__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- unstructured_ingest/__version__.py +1 -1
- unstructured_ingest/v2/processes/connectors/chroma.py +2 -5
- unstructured_ingest/v2/processes/connectors/databricks_volumes.py +2 -7
- unstructured_ingest/v2/processes/connectors/fsspec/azure.py +1 -6
- unstructured_ingest/v2/processes/connectors/fsspec/box.py +1 -6
- unstructured_ingest/v2/processes/connectors/fsspec/dropbox.py +2 -5
- unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py +9 -3
- unstructured_ingest/v2/processes/connectors/fsspec/gcs.py +1 -6
- unstructured_ingest/v2/processes/connectors/fsspec/s3.py +1 -6
- unstructured_ingest/v2/processes/connectors/kdbai.py +2 -5
- unstructured_ingest/v2/processes/connectors/local.py +2 -5
- unstructured_ingest/v2/processes/connectors/milvus.py +2 -5
- unstructured_ingest/v2/processes/connectors/mongodb.py +2 -5
- unstructured_ingest/v2/processes/connectors/pinecone.py +2 -5
- unstructured_ingest/v2/processes/connectors/sql.py +1 -6
- unstructured_ingest/v2/processes/connectors/weaviate.py +2 -5
- {unstructured_ingest-0.0.17.dist-info → unstructured_ingest-0.0.19.dist-info}/METADATA +321 -321
- {unstructured_ingest-0.0.17.dist-info → unstructured_ingest-0.0.19.dist-info}/RECORD +22 -22
- {unstructured_ingest-0.0.17.dist-info → unstructured_ingest-0.0.19.dist-info}/LICENSE.md +0 -0
- {unstructured_ingest-0.0.17.dist-info → unstructured_ingest-0.0.19.dist-info}/WHEEL +0 -0
- {unstructured_ingest-0.0.17.dist-info → unstructured_ingest-0.0.19.dist-info}/entry_points.txt +0 -0
- {unstructured_ingest-0.0.17.dist-info → unstructured_ingest-0.0.19.dist-info}/top_level.txt +0 -0
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.0.
|
|
1
|
+
__version__ = "0.0.19" # pragma: no cover
|
|
@@ -41,13 +41,10 @@ class ChromaAccessConfig(AccessConfig):
|
|
|
41
41
|
)
|
|
42
42
|
|
|
43
43
|
|
|
44
|
-
SecretChromaAccessConfig = Secret[ChromaAccessConfig]
|
|
45
|
-
|
|
46
|
-
|
|
47
44
|
class ChromaConnectionConfig(ConnectionConfig):
|
|
48
45
|
collection_name: str = Field(description="The name of the Chroma collection to write into.")
|
|
49
|
-
access_config:
|
|
50
|
-
default=
|
|
46
|
+
access_config: Secret[ChromaAccessConfig] = Field(
|
|
47
|
+
default=ChromaAccessConfig(), validate_default=True
|
|
51
48
|
)
|
|
52
49
|
path: Optional[str] = Field(
|
|
53
50
|
default=None, description="Location where Chroma is persisted, if not connecting via http."
|
|
@@ -81,14 +81,9 @@ class DatabricksVolumesAccessConfig(AccessConfig):
|
|
|
81
81
|
google_service_account: Optional[str] = None
|
|
82
82
|
|
|
83
83
|
|
|
84
|
-
SecretDatabricksVolumesAccessConfig = Secret[DatabricksVolumesAccessConfig]
|
|
85
|
-
|
|
86
|
-
|
|
87
84
|
class DatabricksVolumesConnectionConfig(ConnectionConfig):
|
|
88
|
-
access_config:
|
|
89
|
-
|
|
90
|
-
secret_value=DatabricksVolumesAccessConfig()
|
|
91
|
-
)
|
|
85
|
+
access_config: Secret[DatabricksVolumesAccessConfig] = Field(
|
|
86
|
+
default=DatabricksVolumesAccessConfig(), validate_default=True
|
|
92
87
|
)
|
|
93
88
|
host: Optional[str] = Field(
|
|
94
89
|
default=None,
|
|
@@ -76,14 +76,9 @@ class AzureAccessConfig(FsspecAccessConfig):
|
|
|
76
76
|
raise ValueError("either connection_string or account_name must be set")
|
|
77
77
|
|
|
78
78
|
|
|
79
|
-
SecretAzureAccessConfig = Secret[AzureAccessConfig]
|
|
80
|
-
|
|
81
|
-
|
|
82
79
|
class AzureConnectionConfig(FsspecConnectionConfig):
|
|
83
80
|
supported_protocols: list[str] = field(default_factory=lambda: ["az"], init=False)
|
|
84
|
-
access_config:
|
|
85
|
-
default_factory=lambda: SecretAzureAccessConfig(secret_value=AzureAccessConfig())
|
|
86
|
-
)
|
|
81
|
+
access_config: Secret[AzureAccessConfig]
|
|
87
82
|
connector_type: str = Field(default=CONNECTOR_TYPE, init=False)
|
|
88
83
|
|
|
89
84
|
def get_access_config(self) -> dict[str, Any]:
|
|
@@ -36,14 +36,9 @@ class BoxAccessConfig(FsspecAccessConfig):
|
|
|
36
36
|
)
|
|
37
37
|
|
|
38
38
|
|
|
39
|
-
SecretBoxAccessConfig = Secret[BoxAccessConfig]
|
|
40
|
-
|
|
41
|
-
|
|
42
39
|
class BoxConnectionConfig(FsspecConnectionConfig):
|
|
43
40
|
supported_protocols: list[str] = field(default_factory=lambda: ["box"], init=False)
|
|
44
|
-
access_config:
|
|
45
|
-
default_factory=lambda: SecretBoxAccessConfig(secret_value=BoxAccessConfig())
|
|
46
|
-
)
|
|
41
|
+
access_config: Secret[BoxAccessConfig] = Field(default=BoxAccessConfig(), validate_default=True)
|
|
47
42
|
connector_type: str = Field(default=CONNECTOR_TYPE, init=False)
|
|
48
43
|
|
|
49
44
|
def get_access_config(self) -> dict[str, Any]:
|
|
@@ -35,13 +35,10 @@ class DropboxAccessConfig(FsspecAccessConfig):
|
|
|
35
35
|
token: Optional[str] = Field(default=None, description="Dropbox access token.")
|
|
36
36
|
|
|
37
37
|
|
|
38
|
-
SecretDropboxAccessConfig = Secret[DropboxAccessConfig]
|
|
39
|
-
|
|
40
|
-
|
|
41
38
|
class DropboxConnectionConfig(FsspecConnectionConfig):
|
|
42
39
|
supported_protocols: list[str] = field(default_factory=lambda: ["dropbox"], init=False)
|
|
43
|
-
access_config:
|
|
44
|
-
|
|
40
|
+
access_config: Secret[DropboxAccessConfig] = Field(
|
|
41
|
+
default=DropboxAccessConfig(), validate_default=True
|
|
45
42
|
)
|
|
46
43
|
connector_type: str = Field(default=CONNECTOR_TYPE, init=False)
|
|
47
44
|
|
|
@@ -102,7 +102,13 @@ class FsspecIndexer(Indexer):
|
|
|
102
102
|
fs = get_filesystem_class(self.index_config.protocol)(
|
|
103
103
|
**self.connection_config.get_access_config(),
|
|
104
104
|
)
|
|
105
|
-
fs.ls(path=self.index_config.path_without_protocol, detail=
|
|
105
|
+
files = fs.ls(path=self.index_config.path_without_protocol, detail=True)
|
|
106
|
+
valid_files = [x.get("name") for x in files if x.get("type") == "file"]
|
|
107
|
+
if not valid_files:
|
|
108
|
+
return
|
|
109
|
+
file_to_sample = valid_files[0]
|
|
110
|
+
logger.debug(f"attempting to make HEAD request for file: {file_to_sample}")
|
|
111
|
+
self.fs.head(path=file_to_sample)
|
|
106
112
|
except Exception as e:
|
|
107
113
|
logger.error(f"failed to validate connection: {e}", exc_info=True)
|
|
108
114
|
raise SourceConnectionError(f"failed to validate connection: {e}")
|
|
@@ -299,8 +305,8 @@ class FsspecUploader(Uploader):
|
|
|
299
305
|
fs = get_filesystem_class(self.upload_config.protocol)(
|
|
300
306
|
**self.connection_config.get_access_config(),
|
|
301
307
|
)
|
|
302
|
-
|
|
303
|
-
fs.
|
|
308
|
+
upload_path = Path(self.upload_config.path_without_protocol) / "_empty"
|
|
309
|
+
fs.write_bytes(path=str(upload_path), value=b"")
|
|
304
310
|
except Exception as e:
|
|
305
311
|
logger.error(f"failed to validate connection: {e}", exc_info=True)
|
|
306
312
|
raise DestinationConnectionError(f"failed to validate connection: {e}")
|
|
@@ -86,14 +86,9 @@ class GcsAccessConfig(FsspecAccessConfig):
|
|
|
86
86
|
raise ValueError("Invalid auth token value")
|
|
87
87
|
|
|
88
88
|
|
|
89
|
-
SecretGcsAccessConfig = Secret[GcsAccessConfig]
|
|
90
|
-
|
|
91
|
-
|
|
92
89
|
class GcsConnectionConfig(FsspecConnectionConfig):
|
|
93
90
|
supported_protocols: list[str] = field(default_factory=lambda: ["gs", "gcs"], init=False)
|
|
94
|
-
access_config:
|
|
95
|
-
default_factory=lambda: SecretGcsAccessConfig(secret_value=GcsAccessConfig())
|
|
96
|
-
)
|
|
91
|
+
access_config: Secret[GcsAccessConfig] = Field(default=GcsAccessConfig(), validate_default=True)
|
|
97
92
|
connector_type: str = Field(default=CONNECTOR_TYPE, init=False)
|
|
98
93
|
|
|
99
94
|
|
|
@@ -49,14 +49,9 @@ class S3AccessConfig(FsspecAccessConfig):
|
|
|
49
49
|
)
|
|
50
50
|
|
|
51
51
|
|
|
52
|
-
SecretS3AccessConfig = Secret[S3AccessConfig]
|
|
53
|
-
|
|
54
|
-
|
|
55
52
|
class S3ConnectionConfig(FsspecConnectionConfig):
|
|
56
53
|
supported_protocols: list[str] = field(default_factory=lambda: ["s3", "s3a"], init=False)
|
|
57
|
-
access_config:
|
|
58
|
-
default_factory=lambda: SecretS3AccessConfig(secret_value=S3AccessConfig())
|
|
59
|
-
)
|
|
54
|
+
access_config: Secret[S3AccessConfig] = Field(default=S3AccessConfig(), validate_default=True)
|
|
60
55
|
endpoint_url: Optional[str] = Field(
|
|
61
56
|
default=None,
|
|
62
57
|
description="Use this endpoint_url, if specified. Needed for "
|
|
@@ -39,12 +39,9 @@ class KdbaiAccessConfig(AccessConfig):
|
|
|
39
39
|
)
|
|
40
40
|
|
|
41
41
|
|
|
42
|
-
SecretKdbaiAccessConfig = Secret[KdbaiAccessConfig]
|
|
43
|
-
|
|
44
|
-
|
|
45
42
|
class KdbaiConnectionConfig(ConnectionConfig):
|
|
46
|
-
access_config:
|
|
47
|
-
default=
|
|
43
|
+
access_config: Secret[KdbaiAccessConfig] = Field(
|
|
44
|
+
default=KdbaiAccessConfig(), validate_default=True
|
|
48
45
|
)
|
|
49
46
|
endpoint: str = Field(
|
|
50
47
|
default="http://localhost:8082", description="Endpoint url where KDBAI is hosted."
|
|
@@ -34,12 +34,9 @@ class LocalAccessConfig(AccessConfig):
|
|
|
34
34
|
pass
|
|
35
35
|
|
|
36
36
|
|
|
37
|
-
SecretLocalAccessConfig = Secret[LocalAccessConfig]
|
|
38
|
-
|
|
39
|
-
|
|
40
37
|
class LocalConnectionConfig(ConnectionConfig):
|
|
41
|
-
access_config:
|
|
42
|
-
|
|
38
|
+
access_config: Secret[LocalAccessConfig] = Field(
|
|
39
|
+
default=LocalAccessConfig(), validate_default=True
|
|
43
40
|
)
|
|
44
41
|
|
|
45
42
|
|
|
@@ -36,12 +36,9 @@ class MilvusAccessConfig(AccessConfig):
|
|
|
36
36
|
token: Optional[str] = Field(default=None, description="Milvus access token")
|
|
37
37
|
|
|
38
38
|
|
|
39
|
-
SecretMilvusAccessConfig = Secret[MilvusAccessConfig]
|
|
40
|
-
|
|
41
|
-
|
|
42
39
|
class MilvusConnectionConfig(ConnectionConfig):
|
|
43
|
-
access_config:
|
|
44
|
-
|
|
40
|
+
access_config: Secret[MilvusAccessConfig] = Field(
|
|
41
|
+
default=MilvusAccessConfig(), validate_default=True
|
|
45
42
|
)
|
|
46
43
|
uri: Optional[str] = Field(
|
|
47
44
|
default=None, description="Milvus uri", examples=["http://localhost:19530"]
|
|
@@ -34,12 +34,9 @@ class MongoDBAccessConfig(AccessConfig):
|
|
|
34
34
|
uri: Optional[str] = Field(default=None, description="URI to user when connecting")
|
|
35
35
|
|
|
36
36
|
|
|
37
|
-
SecretMongoDBAccessConfig = Secret[MongoDBAccessConfig]
|
|
38
|
-
|
|
39
|
-
|
|
40
37
|
class MongoDBConnectionConfig(ConnectionConfig):
|
|
41
|
-
access_config:
|
|
42
|
-
|
|
38
|
+
access_config: Secret[MongoDBAccessConfig] = Field(
|
|
39
|
+
default=MongoDBAccessConfig(), validate_default=True
|
|
43
40
|
)
|
|
44
41
|
host: Optional[str] = Field(
|
|
45
42
|
default=None,
|
|
@@ -36,13 +36,10 @@ class PineconeAccessConfig(AccessConfig):
|
|
|
36
36
|
)
|
|
37
37
|
|
|
38
38
|
|
|
39
|
-
SecretPineconeAccessConfig = Secret[PineconeAccessConfig]
|
|
40
|
-
|
|
41
|
-
|
|
42
39
|
class PineconeConnectionConfig(ConnectionConfig):
|
|
43
40
|
index_name: str = Field(description="Name of the index to connect to.")
|
|
44
|
-
access_config:
|
|
45
|
-
|
|
41
|
+
access_config: Secret[PineconeAccessConfig] = Field(
|
|
42
|
+
default=PineconeAccessConfig(), validate_default=True
|
|
46
43
|
)
|
|
47
44
|
|
|
48
45
|
@requires_dependencies(["pinecone"], extras="pinecone")
|
|
@@ -40,9 +40,6 @@ class SQLAccessConfig(AccessConfig):
|
|
|
40
40
|
password: Optional[str] = Field(default=None, description="DB password")
|
|
41
41
|
|
|
42
42
|
|
|
43
|
-
SecreteSQLAccessConfig = Secret[SQLAccessConfig]
|
|
44
|
-
|
|
45
|
-
|
|
46
43
|
class SQLConnectionConfig(ConnectionConfig):
|
|
47
44
|
db_type: Literal["sqlite", "postgresql"] = Field(
|
|
48
45
|
default=SQLITE_DB, description="Type of the database backend"
|
|
@@ -53,9 +50,7 @@ class SQLConnectionConfig(ConnectionConfig):
|
|
|
53
50
|
)
|
|
54
51
|
host: Optional[str] = Field(default=None, description="DB host")
|
|
55
52
|
port: Optional[int] = Field(default=5432, description="DB host connection port")
|
|
56
|
-
access_config:
|
|
57
|
-
default_factory=lambda: SecreteSQLAccessConfig(secret_value=SQLAccessConfig())
|
|
58
|
-
)
|
|
53
|
+
access_config: Secret[SQLAccessConfig] = Field(default=SQLAccessConfig(), validate_default=True)
|
|
59
54
|
connector_type: str = Field(default=CONNECTOR_TYPE, init=False)
|
|
60
55
|
|
|
61
56
|
def __post_init__(self):
|
|
@@ -38,16 +38,13 @@ class WeaviateAccessConfig(AccessConfig):
|
|
|
38
38
|
password: Optional[str] = None
|
|
39
39
|
|
|
40
40
|
|
|
41
|
-
SecretWeaviateAccessConfig = Secret[WeaviateAccessConfig]
|
|
42
|
-
|
|
43
|
-
|
|
44
41
|
class WeaviateConnectionConfig(ConnectionConfig):
|
|
45
42
|
host_url: str = Field(description="Weaviate instance url")
|
|
46
43
|
class_name: str = Field(
|
|
47
44
|
description="Name of the class to push the records into, e.g: Pdf-elements"
|
|
48
45
|
)
|
|
49
|
-
access_config:
|
|
50
|
-
|
|
46
|
+
access_config: Secret[WeaviateAccessConfig] = Field(
|
|
47
|
+
default=WeaviateAccessConfig(), validate_default=True
|
|
51
48
|
)
|
|
52
49
|
username: Optional[str] = None
|
|
53
50
|
anonymous: bool = Field(default=False, description="if set, all auth values will be ignored")
|