unstructured-ingest 0.0.17__py3-none-any.whl → 0.0.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (22) hide show
  1. unstructured_ingest/__version__.py +1 -1
  2. unstructured_ingest/v2/processes/connectors/chroma.py +2 -5
  3. unstructured_ingest/v2/processes/connectors/databricks_volumes.py +2 -7
  4. unstructured_ingest/v2/processes/connectors/fsspec/azure.py +1 -6
  5. unstructured_ingest/v2/processes/connectors/fsspec/box.py +1 -6
  6. unstructured_ingest/v2/processes/connectors/fsspec/dropbox.py +2 -5
  7. unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py +9 -3
  8. unstructured_ingest/v2/processes/connectors/fsspec/gcs.py +1 -6
  9. unstructured_ingest/v2/processes/connectors/fsspec/s3.py +1 -6
  10. unstructured_ingest/v2/processes/connectors/kdbai.py +2 -5
  11. unstructured_ingest/v2/processes/connectors/local.py +2 -5
  12. unstructured_ingest/v2/processes/connectors/milvus.py +2 -5
  13. unstructured_ingest/v2/processes/connectors/mongodb.py +2 -5
  14. unstructured_ingest/v2/processes/connectors/pinecone.py +2 -5
  15. unstructured_ingest/v2/processes/connectors/sql.py +1 -6
  16. unstructured_ingest/v2/processes/connectors/weaviate.py +2 -5
  17. {unstructured_ingest-0.0.17.dist-info → unstructured_ingest-0.0.19.dist-info}/METADATA +321 -321
  18. {unstructured_ingest-0.0.17.dist-info → unstructured_ingest-0.0.19.dist-info}/RECORD +22 -22
  19. {unstructured_ingest-0.0.17.dist-info → unstructured_ingest-0.0.19.dist-info}/LICENSE.md +0 -0
  20. {unstructured_ingest-0.0.17.dist-info → unstructured_ingest-0.0.19.dist-info}/WHEEL +0 -0
  21. {unstructured_ingest-0.0.17.dist-info → unstructured_ingest-0.0.19.dist-info}/entry_points.txt +0 -0
  22. {unstructured_ingest-0.0.17.dist-info → unstructured_ingest-0.0.19.dist-info}/top_level.txt +0 -0
@@ -1 +1 @@
1
- __version__ = "0.0.17" # pragma: no cover
1
+ __version__ = "0.0.19" # pragma: no cover
@@ -41,13 +41,10 @@ class ChromaAccessConfig(AccessConfig):
41
41
  )
42
42
 
43
43
 
44
- SecretChromaAccessConfig = Secret[ChromaAccessConfig]
45
-
46
-
47
44
  class ChromaConnectionConfig(ConnectionConfig):
48
45
  collection_name: str = Field(description="The name of the Chroma collection to write into.")
49
- access_config: SecretChromaAccessConfig = Field(
50
- default=SecretChromaAccessConfig(secret_value=ChromaAccessConfig())
46
+ access_config: Secret[ChromaAccessConfig] = Field(
47
+ default=ChromaAccessConfig(), validate_default=True
51
48
  )
52
49
  path: Optional[str] = Field(
53
50
  default=None, description="Location where Chroma is persisted, if not connecting via http."
@@ -81,14 +81,9 @@ class DatabricksVolumesAccessConfig(AccessConfig):
81
81
  google_service_account: Optional[str] = None
82
82
 
83
83
 
84
- SecretDatabricksVolumesAccessConfig = Secret[DatabricksVolumesAccessConfig]
85
-
86
-
87
84
  class DatabricksVolumesConnectionConfig(ConnectionConfig):
88
- access_config: SecretDatabricksVolumesAccessConfig = Field(
89
- default_factory=lambda: SecretDatabricksVolumesAccessConfig(
90
- secret_value=DatabricksVolumesAccessConfig()
91
- )
85
+ access_config: Secret[DatabricksVolumesAccessConfig] = Field(
86
+ default=DatabricksVolumesAccessConfig(), validate_default=True
92
87
  )
93
88
  host: Optional[str] = Field(
94
89
  default=None,
@@ -76,14 +76,9 @@ class AzureAccessConfig(FsspecAccessConfig):
76
76
  raise ValueError("either connection_string or account_name must be set")
77
77
 
78
78
 
79
- SecretAzureAccessConfig = Secret[AzureAccessConfig]
80
-
81
-
82
79
  class AzureConnectionConfig(FsspecConnectionConfig):
83
80
  supported_protocols: list[str] = field(default_factory=lambda: ["az"], init=False)
84
- access_config: SecretAzureAccessConfig = Field(
85
- default_factory=lambda: SecretAzureAccessConfig(secret_value=AzureAccessConfig())
86
- )
81
+ access_config: Secret[AzureAccessConfig]
87
82
  connector_type: str = Field(default=CONNECTOR_TYPE, init=False)
88
83
 
89
84
  def get_access_config(self) -> dict[str, Any]:
@@ -36,14 +36,9 @@ class BoxAccessConfig(FsspecAccessConfig):
36
36
  )
37
37
 
38
38
 
39
- SecretBoxAccessConfig = Secret[BoxAccessConfig]
40
-
41
-
42
39
  class BoxConnectionConfig(FsspecConnectionConfig):
43
40
  supported_protocols: list[str] = field(default_factory=lambda: ["box"], init=False)
44
- access_config: SecretBoxAccessConfig = Field(
45
- default_factory=lambda: SecretBoxAccessConfig(secret_value=BoxAccessConfig())
46
- )
41
+ access_config: Secret[BoxAccessConfig] = Field(default=BoxAccessConfig(), validate_default=True)
47
42
  connector_type: str = Field(default=CONNECTOR_TYPE, init=False)
48
43
 
49
44
  def get_access_config(self) -> dict[str, Any]:
@@ -35,13 +35,10 @@ class DropboxAccessConfig(FsspecAccessConfig):
35
35
  token: Optional[str] = Field(default=None, description="Dropbox access token.")
36
36
 
37
37
 
38
- SecretDropboxAccessConfig = Secret[DropboxAccessConfig]
39
-
40
-
41
38
  class DropboxConnectionConfig(FsspecConnectionConfig):
42
39
  supported_protocols: list[str] = field(default_factory=lambda: ["dropbox"], init=False)
43
- access_config: SecretDropboxAccessConfig = Field(
44
- default_factory=lambda: SecretDropboxAccessConfig(secret_value=DropboxAccessConfig())
40
+ access_config: Secret[DropboxAccessConfig] = Field(
41
+ default=DropboxAccessConfig(), validate_default=True
45
42
  )
46
43
  connector_type: str = Field(default=CONNECTOR_TYPE, init=False)
47
44
 
@@ -102,7 +102,13 @@ class FsspecIndexer(Indexer):
102
102
  fs = get_filesystem_class(self.index_config.protocol)(
103
103
  **self.connection_config.get_access_config(),
104
104
  )
105
- fs.ls(path=self.index_config.path_without_protocol, detail=False)
105
+ files = fs.ls(path=self.index_config.path_without_protocol, detail=True)
106
+ valid_files = [x.get("name") for x in files if x.get("type") == "file"]
107
+ if not valid_files:
108
+ return
109
+ file_to_sample = valid_files[0]
110
+ logger.debug(f"attempting to make HEAD request for file: {file_to_sample}")
111
+ self.fs.head(path=file_to_sample)
106
112
  except Exception as e:
107
113
  logger.error(f"failed to validate connection: {e}", exc_info=True)
108
114
  raise SourceConnectionError(f"failed to validate connection: {e}")
@@ -299,8 +305,8 @@ class FsspecUploader(Uploader):
299
305
  fs = get_filesystem_class(self.upload_config.protocol)(
300
306
  **self.connection_config.get_access_config(),
301
307
  )
302
- root_dir = self.upload_config.path_without_protocol.split("/")[0]
303
- fs.ls(path=root_dir, detail=False)
308
+ upload_path = Path(self.upload_config.path_without_protocol) / "_empty"
309
+ fs.write_bytes(path=str(upload_path), value=b"")
304
310
  except Exception as e:
305
311
  logger.error(f"failed to validate connection: {e}", exc_info=True)
306
312
  raise DestinationConnectionError(f"failed to validate connection: {e}")
@@ -86,14 +86,9 @@ class GcsAccessConfig(FsspecAccessConfig):
86
86
  raise ValueError("Invalid auth token value")
87
87
 
88
88
 
89
- SecretGcsAccessConfig = Secret[GcsAccessConfig]
90
-
91
-
92
89
  class GcsConnectionConfig(FsspecConnectionConfig):
93
90
  supported_protocols: list[str] = field(default_factory=lambda: ["gs", "gcs"], init=False)
94
- access_config: SecretGcsAccessConfig = Field(
95
- default_factory=lambda: SecretGcsAccessConfig(secret_value=GcsAccessConfig())
96
- )
91
+ access_config: Secret[GcsAccessConfig] = Field(default=GcsAccessConfig(), validate_default=True)
97
92
  connector_type: str = Field(default=CONNECTOR_TYPE, init=False)
98
93
 
99
94
 
@@ -49,14 +49,9 @@ class S3AccessConfig(FsspecAccessConfig):
49
49
  )
50
50
 
51
51
 
52
- SecretS3AccessConfig = Secret[S3AccessConfig]
53
-
54
-
55
52
  class S3ConnectionConfig(FsspecConnectionConfig):
56
53
  supported_protocols: list[str] = field(default_factory=lambda: ["s3", "s3a"], init=False)
57
- access_config: SecretS3AccessConfig = Field(
58
- default_factory=lambda: SecretS3AccessConfig(secret_value=S3AccessConfig())
59
- )
54
+ access_config: Secret[S3AccessConfig] = Field(default=S3AccessConfig(), validate_default=True)
60
55
  endpoint_url: Optional[str] = Field(
61
56
  default=None,
62
57
  description="Use this endpoint_url, if specified. Needed for "
@@ -39,12 +39,9 @@ class KdbaiAccessConfig(AccessConfig):
39
39
  )
40
40
 
41
41
 
42
- SecretKdbaiAccessConfig = Secret[KdbaiAccessConfig]
43
-
44
-
45
42
  class KdbaiConnectionConfig(ConnectionConfig):
46
- access_config: SecretKdbaiAccessConfig = Field(
47
- default=SecretKdbaiAccessConfig(secret_value=KdbaiAccessConfig())
43
+ access_config: Secret[KdbaiAccessConfig] = Field(
44
+ default=KdbaiAccessConfig(), validate_default=True
48
45
  )
49
46
  endpoint: str = Field(
50
47
  default="http://localhost:8082", description="Endpoint url where KDBAI is hosted."
@@ -34,12 +34,9 @@ class LocalAccessConfig(AccessConfig):
34
34
  pass
35
35
 
36
36
 
37
- SecretLocalAccessConfig = Secret[LocalAccessConfig]
38
-
39
-
40
37
  class LocalConnectionConfig(ConnectionConfig):
41
- access_config: SecretLocalAccessConfig = Field(
42
- default_factory=lambda: SecretLocalAccessConfig(secret_value=LocalAccessConfig())
38
+ access_config: Secret[LocalAccessConfig] = Field(
39
+ default=LocalAccessConfig(), validate_default=True
43
40
  )
44
41
 
45
42
 
@@ -36,12 +36,9 @@ class MilvusAccessConfig(AccessConfig):
36
36
  token: Optional[str] = Field(default=None, description="Milvus access token")
37
37
 
38
38
 
39
- SecretMilvusAccessConfig = Secret[MilvusAccessConfig]
40
-
41
-
42
39
  class MilvusConnectionConfig(ConnectionConfig):
43
- access_config: SecretMilvusAccessConfig = Field(
44
- default_factory=lambda: SecretMilvusAccessConfig(secret_value=MilvusAccessConfig())
40
+ access_config: Secret[MilvusAccessConfig] = Field(
41
+ default=MilvusAccessConfig(), validate_default=True
45
42
  )
46
43
  uri: Optional[str] = Field(
47
44
  default=None, description="Milvus uri", examples=["http://localhost:19530"]
@@ -34,12 +34,9 @@ class MongoDBAccessConfig(AccessConfig):
34
34
  uri: Optional[str] = Field(default=None, description="URI to user when connecting")
35
35
 
36
36
 
37
- SecretMongoDBAccessConfig = Secret[MongoDBAccessConfig]
38
-
39
-
40
37
  class MongoDBConnectionConfig(ConnectionConfig):
41
- access_config: SecretMongoDBAccessConfig = Field(
42
- default_factory=lambda: SecretMongoDBAccessConfig(secret_value=MongoDBAccessConfig())
38
+ access_config: Secret[MongoDBAccessConfig] = Field(
39
+ default=MongoDBAccessConfig(), validate_default=True
43
40
  )
44
41
  host: Optional[str] = Field(
45
42
  default=None,
@@ -36,13 +36,10 @@ class PineconeAccessConfig(AccessConfig):
36
36
  )
37
37
 
38
38
 
39
- SecretPineconeAccessConfig = Secret[PineconeAccessConfig]
40
-
41
-
42
39
  class PineconeConnectionConfig(ConnectionConfig):
43
40
  index_name: str = Field(description="Name of the index to connect to.")
44
- access_config: SecretPineconeAccessConfig = Field(
45
- default_factory=lambda: SecretPineconeAccessConfig(secret_value=PineconeAccessConfig())
41
+ access_config: Secret[PineconeAccessConfig] = Field(
42
+ default=PineconeAccessConfig(), validate_default=True
46
43
  )
47
44
 
48
45
  @requires_dependencies(["pinecone"], extras="pinecone")
@@ -40,9 +40,6 @@ class SQLAccessConfig(AccessConfig):
40
40
  password: Optional[str] = Field(default=None, description="DB password")
41
41
 
42
42
 
43
- SecreteSQLAccessConfig = Secret[SQLAccessConfig]
44
-
45
-
46
43
  class SQLConnectionConfig(ConnectionConfig):
47
44
  db_type: Literal["sqlite", "postgresql"] = Field(
48
45
  default=SQLITE_DB, description="Type of the database backend"
@@ -53,9 +50,7 @@ class SQLConnectionConfig(ConnectionConfig):
53
50
  )
54
51
  host: Optional[str] = Field(default=None, description="DB host")
55
52
  port: Optional[int] = Field(default=5432, description="DB host connection port")
56
- access_config: SecreteSQLAccessConfig = Field(
57
- default_factory=lambda: SecreteSQLAccessConfig(secret_value=SQLAccessConfig())
58
- )
53
+ access_config: Secret[SQLAccessConfig] = Field(default=SQLAccessConfig(), validate_default=True)
59
54
  connector_type: str = Field(default=CONNECTOR_TYPE, init=False)
60
55
 
61
56
  def __post_init__(self):
@@ -38,16 +38,13 @@ class WeaviateAccessConfig(AccessConfig):
38
38
  password: Optional[str] = None
39
39
 
40
40
 
41
- SecretWeaviateAccessConfig = Secret[WeaviateAccessConfig]
42
-
43
-
44
41
  class WeaviateConnectionConfig(ConnectionConfig):
45
42
  host_url: str = Field(description="Weaviate instance url")
46
43
  class_name: str = Field(
47
44
  description="Name of the class to push the records into, e.g: Pdf-elements"
48
45
  )
49
- access_config: SecretWeaviateAccessConfig = Field(
50
- default_factory=lambda: SecretWeaviateAccessConfig(secret_value=WeaviateAccessConfig())
46
+ access_config: Secret[WeaviateAccessConfig] = Field(
47
+ default=WeaviateAccessConfig(), validate_default=True
51
48
  )
52
49
  username: Optional[str] = None
53
50
  anonymous: bool = Field(default=False, description="if set, all auth values will be ignored")