unstructured-ingest 0.0.0__py3-none-any.whl → 0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

@@ -1 +1 @@
1
- __version__ = "0.0.0" # pragma: no cover
1
+ __version__ = "0.0.1" # pragma: no cover
@@ -5,7 +5,6 @@ from typing import List, Optional, Tuple
5
5
  from urllib.parse import urlparse
6
6
  from uuid import UUID
7
7
 
8
- import unstructured.ingest.connector.notion.types.blocks as notion_blocks
9
8
  from htmlBuilder.attributes import Style, Type
10
9
  from htmlBuilder.tags import (
11
10
  Body,
@@ -23,6 +22,7 @@ from htmlBuilder.tags import (
23
22
  )
24
23
  from notion_client.errors import APIResponseError
25
24
 
25
+ import unstructured_ingest.connector.notion.types.blocks as notion_blocks
26
26
  from unstructured_ingest.connector.notion.client import Client
27
27
  from unstructured_ingest.connector.notion.interfaces import BlockBase
28
28
  from unstructured_ingest.connector.notion.types.block import Block
@@ -3,7 +3,7 @@ import json
3
3
  import logging
4
4
  import typing as t
5
5
 
6
- logger = logging.getLogger("unstructured.ingest")
6
+ logger = logging.getLogger("unstructured_ingest")
7
7
 
8
8
 
9
9
  def default_is_data_sensitive(k: str, v: t.Any) -> bool:
@@ -119,7 +119,7 @@ def ingest_log_streaming_init(level: int) -> None:
119
119
 
120
120
  def make_default_logger(level: int) -> logging.Logger:
121
121
  """Return a custom logger."""
122
- logger = logging.getLogger("unstructured.ingest")
122
+ logger = logging.getLogger("unstructured_ingest")
123
123
  handler = logging.StreamHandler()
124
124
  handler.name = "ingest_log_handler"
125
125
  formatter = SensitiveFormatter("%(asctime)s %(processName)-10s %(levelname)-8s %(message)s")
@@ -15,6 +15,7 @@ from .fsspec.s3 import s3_dest_cmd, s3_src_cmd
15
15
  from .fsspec.sftp import sftp_dest_cmd, sftp_src_cmd
16
16
  from .google_drive import google_drive_src_cmd
17
17
  from .local import local_dest_cmd, local_src_cmd
18
+ from .milvus import milvus_dest_cmd
18
19
  from .mongodb import mongodb_dest_cmd
19
20
  from .onedrive import onedrive_drive_src_cmd
20
21
  from .opensearch import opensearch_dest_cmd, opensearch_src_cmd
@@ -60,6 +61,7 @@ dest_cmds = [
60
61
  elasticsearch_dest_cmd,
61
62
  gcs_dest_cmd,
62
63
  local_dest_cmd,
64
+ milvus_dest_cmd,
63
65
  opensearch_dest_cmd,
64
66
  pinecone_dest_cmd,
65
67
  s3_dest_cmd,
@@ -0,0 +1,72 @@
1
+ from dataclasses import dataclass
2
+
3
+ import click
4
+
5
+ from unstructured_ingest.v2.cli.base import DestCmd
6
+ from unstructured_ingest.v2.cli.interfaces import CliConfig
7
+ from unstructured_ingest.v2.processes.connectors.milvus import CONNECTOR_TYPE
8
+
9
+
10
+ @dataclass
11
+ class MilvusCliConnectionConfig(CliConfig):
12
+ @staticmethod
13
+ def get_cli_options() -> list[click.Option]:
14
+ options = [
15
+ click.Option(
16
+ ["--uri"],
17
+ required=False,
18
+ type=str,
19
+ default=None,
20
+ help="Milvus uri, eg 'http://localhost:19530",
21
+ ),
22
+ click.Option(
23
+ ["--user"],
24
+ required=False,
25
+ type=str,
26
+ default=None,
27
+ help="Milvus user",
28
+ ),
29
+ click.Option(
30
+ ["--password"],
31
+ required=False,
32
+ type=str,
33
+ default=None,
34
+ help="Milvus password",
35
+ ),
36
+ click.Option(
37
+ ["--db-name"],
38
+ required=False,
39
+ type=str,
40
+ default=None,
41
+ help="Milvus database name",
42
+ ),
43
+ ]
44
+ return options
45
+
46
+
47
+ @dataclass
48
+ class MilvusCliUploaderConfig(CliConfig):
49
+ @staticmethod
50
+ def get_cli_options() -> list[click.Option]:
51
+ options = [
52
+ click.Option(
53
+ ["--collection-name"],
54
+ required=True,
55
+ type=str,
56
+ help="Milvus collections to write to",
57
+ ),
58
+ click.Option(
59
+ ["--num-of-processes"],
60
+ type=click.IntRange(min=1),
61
+ default=4,
62
+ help="number of processes to use when writing to support parallel writes",
63
+ ),
64
+ ]
65
+ return options
66
+
67
+
68
+ milvus_dest_cmd = DestCmd(
69
+ cmd_name=CONNECTOR_TYPE,
70
+ connection_config=MilvusCliConnectionConfig,
71
+ uploader_config=MilvusCliUploaderConfig,
72
+ )
@@ -5,7 +5,7 @@ from logging import Formatter, Logger, StreamHandler, getLevelName, getLogger
5
5
  from typing import Any, Callable
6
6
 
7
7
  log_level = os.getenv("INGEST_LOG_LEVEL", "INFO")
8
- LOGGER_NAME = "unstructured.ingest.v2"
8
+ LOGGER_NAME = "unstructured_ingest.v2"
9
9
 
10
10
 
11
11
  def default_is_data_sensitive(k: str, v: Any) -> bool:
@@ -146,6 +146,8 @@ class PipelineStep(ABC):
146
146
  logger.error(f"Exception raised while running {self.identifier}", exc_info=e)
147
147
  if "file_data_path" in kwargs:
148
148
  self.context.status[kwargs["file_data_path"]] = {self.identifier: str(e)}
149
+ else:
150
+ self.context.status[self.identifier] = {"step_error": str(e)}
149
151
  if self.context.raise_on_error:
150
152
  raise e
151
153
  return None
@@ -158,6 +160,8 @@ class PipelineStep(ABC):
158
160
  logger.error(f"Exception raised while running {self.identifier}", exc_info=e)
159
161
  if "file_data_path" in kwargs:
160
162
  self.context.status[kwargs["file_data_path"]] = {self.identifier: str(e)}
163
+ else:
164
+ self.context.status[self.identifier] = {"step_error": str(e)}
161
165
  if self.context.raise_on_error:
162
166
  raise e
163
167
  return None
@@ -1,7 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
- import unstructured.ingest.v2.processes.connectors.fsspec # noqa: F401
4
-
3
+ import unstructured_ingest.v2.processes.connectors.fsspec # noqa: F401
5
4
  from unstructured_ingest.v2.processes.connector_registry import (
6
5
  add_destination_entry,
7
6
  add_source_entry,
@@ -19,6 +18,8 @@ from .google_drive import CONNECTOR_TYPE as GOOGLE_DRIVE_CONNECTOR_TYPE
19
18
  from .google_drive import google_drive_source_entry
20
19
  from .local import CONNECTOR_TYPE as LOCAL_CONNECTOR_TYPE
21
20
  from .local import local_destination_entry, local_source_entry
21
+ from .milvus import CONNECTOR_TYPE as MILVUS_CONNECTOR_TYPE
22
+ from .milvus import milvus_destination_entry
22
23
  from .mongodb import CONNECTOR_TYPE as MONGODB_CONNECTOR_TYPE
23
24
  from .mongodb import mongodb_destination_entry
24
25
  from .onedrive import CONNECTOR_TYPE as ONEDRIVE_CONNECTOR_TYPE
@@ -75,3 +76,4 @@ add_source_entry(source_type=SHAREPOINT_CONNECTOR_TYPE, entry=sharepoint_source_
75
76
  add_destination_entry(
76
77
  destination_type=SINGLESTORE_CONNECTOR_TYPE, entry=singlestore_destination_entry
77
78
  )
79
+ add_destination_entry(destination_type=MILVUS_CONNECTOR_TYPE, entry=milvus_destination_entry)
@@ -0,0 +1,200 @@
1
+ import json
2
+ import multiprocessing as mp
3
+ from dataclasses import dataclass, field
4
+ from pathlib import Path
5
+ from typing import TYPE_CHECKING, Any, Optional, Union
6
+
7
+ import pandas as pd
8
+ from dateutil import parser
9
+
10
+ from unstructured_ingest.enhanced_dataclass import enhanced_field
11
+ from unstructured_ingest.error import WriteError
12
+ from unstructured_ingest.utils.data_prep import flatten_dict
13
+ from unstructured_ingest.utils.dep_check import requires_dependencies
14
+ from unstructured_ingest.v2.interfaces import (
15
+ AccessConfig,
16
+ ConnectionConfig,
17
+ FileData,
18
+ UploadContent,
19
+ Uploader,
20
+ UploaderConfig,
21
+ UploadStager,
22
+ UploadStagerConfig,
23
+ )
24
+ from unstructured_ingest.v2.logger import logger
25
+ from unstructured_ingest.v2.processes.connector_registry import (
26
+ DestinationRegistryEntry,
27
+ )
28
+
29
+ if TYPE_CHECKING:
30
+ from pymilvus import MilvusClient
31
+
32
+ CONNECTOR_TYPE = "milvus"
33
+
34
+
35
+ @dataclass
36
+ class MilvusAccessConfig(AccessConfig):
37
+ password: Optional[str] = None
38
+ token: Optional[str] = None
39
+
40
+
41
+ @dataclass
42
+ class MilvusConnectionConfig(ConnectionConfig):
43
+ access_config: MilvusAccessConfig = enhanced_field(
44
+ sensitive=True, default_factory=lambda: MilvusAccessConfig()
45
+ )
46
+ uri: Optional[str] = None
47
+ user: Optional[str] = None
48
+ db_name: Optional[str] = None
49
+
50
+ def get_connection_kwargs(self) -> dict[str, Any]:
51
+ access_config_dict = self.access_config.to_dict()
52
+ connection_config_dict = self.to_dict()
53
+ connection_config_dict.pop("access_config", None)
54
+ connection_config_dict.update(access_config_dict)
55
+ # Drop any that were not set explicitly
56
+ connection_config_dict = {k: v for k, v in connection_config_dict.items() if v is not None}
57
+ return connection_config_dict
58
+
59
+ @requires_dependencies(["pymilvus"], extras="milvus")
60
+ def get_client(self) -> "MilvusClient":
61
+ from pymilvus import MilvusClient
62
+
63
+ return MilvusClient(**self.get_connection_kwargs())
64
+
65
+
66
+ @dataclass
67
+ class MilvusUploadStagerConfig(UploadStagerConfig):
68
+ pass
69
+
70
+
71
+ @dataclass
72
+ class MilvusUploadStager(UploadStager):
73
+ upload_stager_config: MilvusUploadStagerConfig = field(
74
+ default_factory=lambda: MilvusUploadStagerConfig()
75
+ )
76
+
77
+ @staticmethod
78
+ def parse_date_string(date_string: str) -> float:
79
+ try:
80
+ timestamp = float(date_string)
81
+ return timestamp
82
+ except ValueError:
83
+ pass
84
+ return parser.parse(date_string).timestamp()
85
+
86
+ @classmethod
87
+ def conform_dict(cls, data: dict) -> None:
88
+ datetime_columns = [
89
+ "data_source_date_created",
90
+ "data_source_date_modified",
91
+ "data_source_date_processed",
92
+ "last_modified",
93
+ ]
94
+
95
+ json_dumps_fields = ["languages", "data_source_permissions_data"]
96
+
97
+ # TODO: milvus sdk doesn't seem to support defaults via the schema yet,
98
+ # remove once that gets updated
99
+ defaults = {"is_continuation": False}
100
+
101
+ if metadata := data.pop("metadata", None):
102
+ data.update(flatten_dict(metadata, keys_to_omit=["data_source_record_locator"]))
103
+ for datetime_column in datetime_columns:
104
+ if datetime_column in data:
105
+ data[datetime_column] = cls.parse_date_string(data[datetime_column])
106
+ for json_dumps_field in json_dumps_fields:
107
+ if json_dumps_field in data:
108
+ data[json_dumps_field] = json.dumps(data[json_dumps_field])
109
+ for default in defaults:
110
+ if default not in data:
111
+ data[default] = defaults[default]
112
+
113
+ def run(
114
+ self,
115
+ elements_filepath: Path,
116
+ file_data: FileData,
117
+ output_dir: Path,
118
+ output_filename: str,
119
+ **kwargs: Any,
120
+ ) -> Path:
121
+ with open(elements_filepath) as elements_file:
122
+ elements_contents: list[dict[str, Any]] = json.load(elements_file)
123
+ for element in elements_contents:
124
+ self.conform_dict(data=element)
125
+
126
+ output_path = Path(output_dir) / Path(f"{output_filename}.json")
127
+ output_path.parent.mkdir(parents=True, exist_ok=True)
128
+ with output_path.open("w") as output_file:
129
+ json.dump(elements_contents, output_file, indent=2)
130
+ return output_path
131
+
132
+
133
+ @dataclass
134
+ class MilvusUploaderConfig(UploaderConfig):
135
+ collection_name: str
136
+ num_of_processes: int = 4
137
+
138
+
139
+ @dataclass
140
+ class MilvusUploader(Uploader):
141
+ connection_config: MilvusConnectionConfig
142
+ upload_config: MilvusUploaderConfig
143
+ connector_type: str = CONNECTOR_TYPE
144
+
145
+ def upload(self, content: UploadContent) -> None:
146
+ file_extension = content.path.suffix
147
+ if file_extension == ".json":
148
+ self.upload_json(content=content)
149
+ elif file_extension == ".csv":
150
+ self.upload_csv(content=content)
151
+ else:
152
+ raise ValueError(f"Unsupported file extension: {file_extension}")
153
+
154
+ @requires_dependencies(["pymilvus"], extras="milvus")
155
+ def insert_results(self, data: Union[dict, list[dict]]):
156
+ from pymilvus import MilvusException
157
+
158
+ logger.debug(
159
+ f"uploading {len(data)} entries to {self.connection_config.db_name} "
160
+ f"db in collection {self.upload_config.collection_name}"
161
+ )
162
+ client = self.connection_config.get_client()
163
+
164
+ try:
165
+ res = client.insert(collection_name=self.upload_config.collection_name, data=data)
166
+ except MilvusException as milvus_exception:
167
+ raise WriteError("failed to upload records to milvus") from milvus_exception
168
+ if "err_count" in res and isinstance(res["err_count"], int) and res["err_count"] > 0:
169
+ err_count = res["err_count"]
170
+ raise WriteError(f"failed to upload {err_count} docs")
171
+
172
+ def upload_csv(self, content: UploadContent) -> None:
173
+ df = pd.read_csv(content.path)
174
+ data = df.to_dict(orient="records")
175
+ self.insert_results(data=data)
176
+
177
+ def upload_json(self, content: UploadContent) -> None:
178
+ with content.path.open("r") as file:
179
+ data: list[dict] = json.load(file)
180
+ self.insert_results(data=data)
181
+
182
+ def run(self, contents: list[UploadContent], **kwargs: Any) -> None:
183
+ if self.upload_config.num_of_processes == 1:
184
+ for content in contents:
185
+ self.upload(content=content)
186
+
187
+ else:
188
+ with mp.Pool(
189
+ processes=self.upload_config.num_of_processes,
190
+ ) as pool:
191
+ pool.map(self.upload, contents)
192
+
193
+
194
+ milvus_destination_entry = DestinationRegistryEntry(
195
+ connection_config=MilvusConnectionConfig,
196
+ uploader=MilvusUploader,
197
+ uploader_config=MilvusUploaderConfig,
198
+ upload_stager=MilvusUploadStager,
199
+ upload_stager_config=MilvusUploadStagerConfig,
200
+ )
@@ -5,10 +5,6 @@ from dataclasses import dataclass, field
5
5
  from pathlib import Path
6
6
  from typing import TYPE_CHECKING, Any, Optional
7
7
 
8
- from unstructured.ingest.v2.logger import logger
9
- from unstructured.ingest.v2.processes.connector_registry import (
10
- DestinationRegistryEntry,
11
- )
12
8
  from unstructured.staging.base import flatten_dict
13
9
  from unstructured.utils import requires_dependencies
14
10
 
@@ -24,6 +20,10 @@ from unstructured_ingest.v2.interfaces import (
24
20
  UploadStager,
25
21
  UploadStagerConfig,
26
22
  )
23
+ from unstructured_ingest.v2.logger import logger
24
+ from unstructured_ingest.v2.processes.connector_registry import (
25
+ DestinationRegistryEntry,
26
+ )
27
27
 
28
28
  if TYPE_CHECKING:
29
29
  from pinecone import Index as PineconeIndex
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: unstructured-ingest
3
- Version: 0.0.0
3
+ Version: 0.0.1
4
4
  Summary: A library that prepares raw documents for downstream ML tasks.
5
5
  Home-page: https://github.com/Unstructured-IO/unstructured-ingest
6
6
  Author: Unstructured Technologies
@@ -29,22 +29,22 @@ Requires-Dist: pyairtable ; extra == 'airtable'
29
29
  Provides-Extra: astra
30
30
  Requires-Dist: astrapy ; extra == 'astra'
31
31
  Provides-Extra: azure
32
- Requires-Dist: fsspec ; extra == 'azure'
33
32
  Requires-Dist: adlfs ; extra == 'azure'
33
+ Requires-Dist: fsspec ; extra == 'azure'
34
34
  Provides-Extra: azure-cognitive-search
35
35
  Requires-Dist: azure-search-documents ; extra == 'azure-cognitive-search'
36
36
  Provides-Extra: bedrock
37
- Requires-Dist: boto3 ; extra == 'bedrock'
38
37
  Requires-Dist: langchain-community ; extra == 'bedrock'
38
+ Requires-Dist: boto3 ; extra == 'bedrock'
39
39
  Provides-Extra: biomed
40
40
  Requires-Dist: bs4 ; extra == 'biomed'
41
41
  Provides-Extra: box
42
- Requires-Dist: boxfs ; extra == 'box'
43
42
  Requires-Dist: fsspec ; extra == 'box'
43
+ Requires-Dist: boxfs ; extra == 'box'
44
44
  Provides-Extra: chroma
45
- Requires-Dist: importlib-metadata >=7.1.0 ; extra == 'chroma'
46
45
  Requires-Dist: typer <=0.9.0 ; extra == 'chroma'
47
46
  Requires-Dist: chromadb ; extra == 'chroma'
47
+ Requires-Dist: importlib-metadata >=7.1.0 ; extra == 'chroma'
48
48
  Provides-Extra: clarifai
49
49
  Requires-Dist: clarifai ; extra == 'clarifai'
50
50
  Provides-Extra: confluence
@@ -72,21 +72,21 @@ Requires-Dist: huggingface ; extra == 'embed-huggingface'
72
72
  Requires-Dist: langchain-community ; extra == 'embed-huggingface'
73
73
  Requires-Dist: sentence-transformers ; extra == 'embed-huggingface'
74
74
  Provides-Extra: embed-octoai
75
- Requires-Dist: openai ; extra == 'embed-octoai'
76
75
  Requires-Dist: tiktoken ; extra == 'embed-octoai'
76
+ Requires-Dist: openai ; extra == 'embed-octoai'
77
77
  Provides-Extra: embed-vertexai
78
- Requires-Dist: langchain-google-vertexai ; extra == 'embed-vertexai'
79
78
  Requires-Dist: langchain-community ; extra == 'embed-vertexai'
80
79
  Requires-Dist: langchain ; extra == 'embed-vertexai'
80
+ Requires-Dist: langchain-google-vertexai ; extra == 'embed-vertexai'
81
81
  Provides-Extra: embed-voyageai
82
82
  Requires-Dist: langchain-voyageai ; extra == 'embed-voyageai'
83
83
  Requires-Dist: langchain ; extra == 'embed-voyageai'
84
84
  Provides-Extra: epub
85
85
  Requires-Dist: unstructured[epub] ; extra == 'epub'
86
86
  Provides-Extra: gcs
87
- Requires-Dist: fsspec ; extra == 'gcs'
88
87
  Requires-Dist: bs4 ; extra == 'gcs'
89
88
  Requires-Dist: gcsfs ; extra == 'gcs'
89
+ Requires-Dist: fsspec ; extra == 'gcs'
90
90
  Provides-Extra: github
91
91
  Requires-Dist: pygithub >1.58.0 ; extra == 'github'
92
92
  Provides-Extra: gitlab
@@ -94,38 +94,40 @@ Requires-Dist: python-gitlab ; extra == 'gitlab'
94
94
  Provides-Extra: google-drive
95
95
  Requires-Dist: google-api-python-client ; extra == 'google-drive'
96
96
  Provides-Extra: hubspot
97
- Requires-Dist: urllib3 ; extra == 'hubspot'
98
97
  Requires-Dist: hubspot-api-client ; extra == 'hubspot'
98
+ Requires-Dist: urllib3 ; extra == 'hubspot'
99
99
  Provides-Extra: jira
100
100
  Requires-Dist: atlassian-python-api ; extra == 'jira'
101
101
  Provides-Extra: kafka
102
102
  Requires-Dist: confluent-kafka ; extra == 'kafka'
103
103
  Provides-Extra: md
104
104
  Requires-Dist: unstructured[md] ; extra == 'md'
105
+ Provides-Extra: milvus
106
+ Requires-Dist: pymilvus ; extra == 'milvus'
105
107
  Provides-Extra: mongodb
106
108
  Requires-Dist: pymongo ; extra == 'mongodb'
107
109
  Provides-Extra: msg
108
110
  Requires-Dist: unstructured[msg] ; extra == 'msg'
109
111
  Provides-Extra: notion
110
- Requires-Dist: notion-client ; extra == 'notion'
111
112
  Requires-Dist: htmlBuilder ; extra == 'notion'
113
+ Requires-Dist: notion-client ; extra == 'notion'
112
114
  Provides-Extra: odt
113
115
  Requires-Dist: unstructured[odt] ; extra == 'odt'
114
116
  Provides-Extra: onedrive
115
- Requires-Dist: msal ; extra == 'onedrive'
116
117
  Requires-Dist: bs4 ; extra == 'onedrive'
117
118
  Requires-Dist: Office365-REST-Python-Client ; extra == 'onedrive'
119
+ Requires-Dist: msal ; extra == 'onedrive'
118
120
  Provides-Extra: openai
119
- Requires-Dist: openai ; extra == 'openai'
120
121
  Requires-Dist: tiktoken ; extra == 'openai'
121
122
  Requires-Dist: langchain-community ; extra == 'openai'
123
+ Requires-Dist: openai ; extra == 'openai'
122
124
  Provides-Extra: opensearch
123
125
  Requires-Dist: opensearch-py ; extra == 'opensearch'
124
126
  Provides-Extra: org
125
127
  Requires-Dist: unstructured[org] ; extra == 'org'
126
128
  Provides-Extra: outlook
127
- Requires-Dist: msal ; extra == 'outlook'
128
129
  Requires-Dist: Office365-REST-Python-Client ; extra == 'outlook'
130
+ Requires-Dist: msal ; extra == 'outlook'
129
131
  Provides-Extra: pdf
130
132
  Requires-Dist: unstructured[pdf] ; extra == 'pdf'
131
133
  Provides-Extra: pinecone
@@ -150,11 +152,11 @@ Requires-Dist: s3fs ; extra == 's3'
150
152
  Provides-Extra: salesforce
151
153
  Requires-Dist: simple-salesforce ; extra == 'salesforce'
152
154
  Provides-Extra: sftp
153
- Requires-Dist: fsspec ; extra == 'sftp'
154
155
  Requires-Dist: paramiko ; extra == 'sftp'
156
+ Requires-Dist: fsspec ; extra == 'sftp'
155
157
  Provides-Extra: sharepoint
156
- Requires-Dist: msal ; extra == 'sharepoint'
157
158
  Requires-Dist: Office365-REST-Python-Client ; extra == 'sharepoint'
159
+ Requires-Dist: msal ; extra == 'sharepoint'
158
160
  Provides-Extra: singlestore
159
161
  Requires-Dist: singlestoredb ; extra == 'singlestore'
160
162
  Provides-Extra: slack
@@ -1,9 +1,9 @@
1
1
  unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
2
- unstructured_ingest/__version__.py,sha256=Q49HKCptFgT2OeWLy_cQ73sq2sMFreeYIt3GaSMpXf8,42
2
+ unstructured_ingest/__version__.py,sha256=SI019rW6paHw93e6fOWFzF9TruLom8o9HrgZsjGZvaE,42
3
3
  unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
4
4
  unstructured_ingest/evaluate.py,sha256=R-mKLFXbVX1xQ1tjGsLHjdP-TbSSV-925IHzggW_bIg,9793
5
5
  unstructured_ingest/interfaces.py,sha256=uS8L5mS0mXD8I4XTfVlKZxAwqnpJ4yrRqn4vxWVRhQI,31107
6
- unstructured_ingest/logger.py,sha256=P5KVgFSRN4uSSNmf5S00zr_TdlL7uAhjxn_26tcNWxI,4480
6
+ unstructured_ingest/logger.py,sha256=TrhyH7VbCWO5VVuhvL0yUyXxuem3b4pzbqj2uQHUwZk,4480
7
7
  unstructured_ingest/main.py,sha256=82G_7eG4PNhc_xIqj4Y_sFbDV9VI-nwSfsfJQMzovMk,169
8
8
  unstructured_ingest/processor.py,sha256=XKKrvbxsb--5cDzz4hB3-GfWZYyIjJ2ah8FpzQKF_DM,2760
9
9
  unstructured_ingest/cli/__init__.py,sha256=9kNcBOHuXON5lB1MJU9QewEhwPmId56vXqB29-kqEAA,302
@@ -104,7 +104,7 @@ unstructured_ingest/connector/fsspec/sftp.py,sha256=x2w8JGM81S_HXww7Aa-bTY1LjZSi
104
104
  unstructured_ingest/connector/notion/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
105
105
  unstructured_ingest/connector/notion/client.py,sha256=vU1GE64ktEAM4b-jo8UnMAwz60KSiQ6iRI3De3ixNdI,8689
106
106
  unstructured_ingest/connector/notion/connector.py,sha256=E-t7q5XAiYP9xk-1aqIqcGwdJOH8UNgiE0HcH9Oc4i4,17475
107
- unstructured_ingest/connector/notion/helpers.py,sha256=5SbQbNxIenMHyxEAMfrsVsXpNcAKPHo3gwWQVi1NUOc,20702
107
+ unstructured_ingest/connector/notion/helpers.py,sha256=jqg5-cPcrjm5G7dBF4jOjHxdJN1CI7yBeTefiX4hBoM,20702
108
108
  unstructured_ingest/connector/notion/interfaces.py,sha256=SrTT-9c0nvk0fMqVgudYF647r04AdMKi6wkIkMy7Szw,563
109
109
  unstructured_ingest/connector/notion/types/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
110
110
  unstructured_ingest/connector/notion/types/block.py,sha256=AKOY-o6CTFC-caWlkLfKskMuFemH4-Vdrhv7HnRkS8w,3009
@@ -253,7 +253,7 @@ unstructured_ingest/utils/string_and_date_utils.py,sha256=hnGglD8Z626vLhH_UV4Qyb
253
253
  unstructured_ingest/utils/table.py,sha256=aWjcowDVSClNpEAdR6PY3H7khKu4T6T3QqQE6GjmQ_M,3469
254
254
  unstructured_ingest/v2/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
255
255
  unstructured_ingest/v2/example.py,sha256=qkwmpMxUlaJXdDNKQ4LlUt3XGxgTUU3CXGGO57eW5Gs,1644
256
- unstructured_ingest/v2/logger.py,sha256=tI_PtlibnJmv1MMajHT5GSZhQ77dv30UADEWaXWgynA,4324
256
+ unstructured_ingest/v2/logger.py,sha256=akcghdHwpKM3CfoeFzir0zmc7R9Hk7zjquU-X-gwUIw,4324
257
257
  unstructured_ingest/v2/main.py,sha256=WFdLEqEXRy6E9_G-dF20MK2AtgX51Aan1sp_N67U2B8,172
258
258
  unstructured_ingest/v2/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
259
259
  unstructured_ingest/v2/cli/cli.py,sha256=qHXIs-PcvMgDZhP1AR9iDMxh8FXBMJCEDksPBfiMULE,648
@@ -264,7 +264,7 @@ unstructured_ingest/v2/cli/base/cmd.py,sha256=qVHmquVsVDoYyPByKdUTVCwAFfILMYBw5w
264
264
  unstructured_ingest/v2/cli/base/dest.py,sha256=YMbVIHmYDqvOtxZeEY93stmF2p2ImjuJts7-u-NznYw,2887
265
265
  unstructured_ingest/v2/cli/base/importer.py,sha256=nRt0QQ3qpi264-n_mR0l55C2ddM8nowTNzT1jsWaam8,1128
266
266
  unstructured_ingest/v2/cli/base/src.py,sha256=7LnZh9FgUX9rerBH6cizVtTWmM6R2sRkxatnGsxYHG0,2410
267
- unstructured_ingest/v2/cli/cmds/__init__.py,sha256=aOcJb2FLQaUOU-vdu4xHr5_BJQme6ADlPaRjCSHL1Io,2590
267
+ unstructured_ingest/v2/cli/cmds/__init__.py,sha256=DWPMD6Wqus22sSoIEyTSiOJAm97aNjvdpdrXgsL4uQ0,2647
268
268
  unstructured_ingest/v2/cli/cmds/astra.py,sha256=L-GR2KSP_cFQkQm0aVcdiXmgYMJZCVKIAH794y8qT1M,2590
269
269
  unstructured_ingest/v2/cli/cmds/azure_cognitive_search.py,sha256=VTCSUYeIYKnP60lC7DeBYqoqAJnWuBZrwevCXbeIEzw,2248
270
270
  unstructured_ingest/v2/cli/cmds/chroma.py,sha256=RinNOPripk2zRYx1Rt-u-jywXbwh7JsidVia4F0-wyU,3359
@@ -272,6 +272,7 @@ unstructured_ingest/v2/cli/cmds/databricks_volumes.py,sha256=53d9A7UunJLYZFwwwHE
272
272
  unstructured_ingest/v2/cli/cmds/elasticsearch.py,sha256=joUfnV992fAwEDCtFVJaABwgpyQiWeDl1ZCBEudRtnk,5258
273
273
  unstructured_ingest/v2/cli/cmds/google_drive.py,sha256=mXozabpi8kjRFb0S7kw-xMGtEuFoVUxnvefwL5ZIPHc,2334
274
274
  unstructured_ingest/v2/cli/cmds/local.py,sha256=lGBFOVDRlrcCtPFjyk0IAYHLRWg95Kunu1Kli7t0ZK4,1899
275
+ unstructured_ingest/v2/cli/cmds/milvus.py,sha256=PB1ib1rFGGH_-KDi1bSIO3BIiVcqSJEHCBFFrzQrnmI,1998
275
276
  unstructured_ingest/v2/cli/cmds/mongodb.py,sha256=oyV6tacuuxm3dN-AXQgbxvYJiDYo2OOWQKRSBCUGj0E,1823
276
277
  unstructured_ingest/v2/cli/cmds/onedrive.py,sha256=DKqhQyyF-swZxs3C9G5W8ECleq8sWpDbpTuiAHXukXQ,2781
277
278
  unstructured_ingest/v2/cli/cmds/opensearch.py,sha256=7zl8dUXzxs24MDRRASKfNc14IDM798qOXRl2FZdXG1I,3064
@@ -304,7 +305,7 @@ unstructured_ingest/v2/interfaces/processor.py,sha256=uHVHeKo5Gt_zFkaEXw7xgaCBDT
304
305
  unstructured_ingest/v2/interfaces/upload_stager.py,sha256=SylhDl9pK6qa7hvfrhpabCkjwE03yIlI6oM-mQnqtho,1220
305
306
  unstructured_ingest/v2/interfaces/uploader.py,sha256=bzfx3Ei4poXKu-hsgjAB4sj4jKij9CoaRSadUM5LtGk,1083
306
307
  unstructured_ingest/v2/pipeline/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
307
- unstructured_ingest/v2/pipeline/interfaces.py,sha256=3zRcu6sc-2rnm3gALOzXA9mI39m2RUPrREZGTd9x77c,6352
308
+ unstructured_ingest/v2/pipeline/interfaces.py,sha256=Zz76fLHNKw6BDsBSYQXiRa6CvyW91ulvZU0yw5vVQ5M,6544
308
309
  unstructured_ingest/v2/pipeline/pipeline.py,sha256=r8jRMZI2RF8GQIuTcjIFBDeFtMnqpOJmKhEriy6Vo5Y,11616
309
310
  unstructured_ingest/v2/pipeline/utils.py,sha256=oPAitfdnITqh2O8Z0uf6VOHg9BTJhitRzNmKXqTwPxg,422
310
311
  unstructured_ingest/v2/pipeline/steps/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -322,7 +323,7 @@ unstructured_ingest/v2/processes/connector_registry.py,sha256=KOrvJNNRdpBPyqFwmT
322
323
  unstructured_ingest/v2/processes/embedder.py,sha256=QjAsiXAjWtZzh6lJ4D5LsTMBD81zuMBkegXNWq-FZt0,3308
323
324
  unstructured_ingest/v2/processes/partitioner.py,sha256=f6UQoQHVKjl8rmM5J9EcuP30RTFLSLrArGdC6qh-ffE,7645
324
325
  unstructured_ingest/v2/processes/uncompress.py,sha256=x-JZYNs1zJOtRS7xNgiMyrYoAbzKM0p18O8NAl7avCA,1631
325
- unstructured_ingest/v2/processes/connectors/__init__.py,sha256=cuPXXs7__tztof9Z8jE4b7UPQFc6p616hH1BECzBK1M,3762
326
+ unstructured_ingest/v2/processes/connectors/__init__.py,sha256=7QMKd8gtEJTIuK352Ho6XyoFvLLhrWIzgdu0dXwXWOE,3960
326
327
  unstructured_ingest/v2/processes/connectors/astra.py,sha256=TSI_3GHnEh3gYAC30RTG4b2eEB07agroEFmJ38GnQY4,4903
327
328
  unstructured_ingest/v2/processes/connectors/azure_cognitive_search.py,sha256=PT02ZKiJuHMrmBClxqBsyDS0aXUQYLVg02Ns2qh1hD4,7935
328
329
  unstructured_ingest/v2/processes/connectors/chroma.py,sha256=nYzNz-8oq-DN0Z4r7lHQFmlved76IaYeRvm7-EmbGUE,6998
@@ -330,10 +331,11 @@ unstructured_ingest/v2/processes/connectors/databricks_volumes.py,sha256=MTLK7Sv
330
331
  unstructured_ingest/v2/processes/connectors/elasticsearch.py,sha256=6QBvVzPk3mWj9ZqJZN7NvhcJaOO6nSLqLwU6zggP59A,14864
331
332
  unstructured_ingest/v2/processes/connectors/google_drive.py,sha256=IkLVafUu280OOoqYmdfdfMB6zlpiWjs2Z5J31ZzJOj4,12681
332
333
  unstructured_ingest/v2/processes/connectors/local.py,sha256=maAXVKpRRXj_jseC6EPLTosMgw6ll-0lnGsDdAFLWAE,6646
334
+ unstructured_ingest/v2/processes/connectors/milvus.py,sha256=FWH4FH-zns7gh8sITg9pLYE9uKm_3GeOXJ4wjY6PMno,6776
333
335
  unstructured_ingest/v2/processes/connectors/mongodb.py,sha256=ErZWAD-su3OCRGv1h84X1PpAWleUPVZcFDEIYjtyP4E,4310
334
336
  unstructured_ingest/v2/processes/connectors/onedrive.py,sha256=WDDoFEfd8M_QBTpkGNI2zZGZZ_CR1rQiCsBWYOO2JoA,8311
335
337
  unstructured_ingest/v2/processes/connectors/opensearch.py,sha256=HNRZVQsWnjLLm0yAGiIyHRbhAsBnGSXBO_VkUfIdwdE,5463
336
- unstructured_ingest/v2/processes/connectors/pinecone.py,sha256=VkPYmGmFKbgsmmrWV09roxztAv5LlTBVHizPeyPoFVc,5746
338
+ unstructured_ingest/v2/processes/connectors/pinecone.py,sha256=PtAodxemYgiBZESx-g9a8fcL6XagJd9DIDQjrhE8aPk,5746
337
339
  unstructured_ingest/v2/processes/connectors/salesforce.py,sha256=Cz4qEtnbsD9-m1DXANxnVRZTHX2ZaUUBPVFPu5wnFRk,10832
338
340
  unstructured_ingest/v2/processes/connectors/sharepoint.py,sha256=SNovgGUE5tHdfX_lF5zwM_QRZK7mahHzLZKhnqfk6Tc,17696
339
341
  unstructured_ingest/v2/processes/connectors/singlestore.py,sha256=upF2O4hJ2uiBhDRrpQ8CSJUvzmqu2j5H1b_QbReHJpw,5168
@@ -349,8 +351,8 @@ unstructured_ingest/v2/processes/connectors/fsspec/gcs.py,sha256=RYZq_8hKF7bRxuB
349
351
  unstructured_ingest/v2/processes/connectors/fsspec/s3.py,sha256=7lOm5hjb0LBkbe-OWXnV3wDC-3mM_GWwwmdKW0xzh8c,5333
350
352
  unstructured_ingest/v2/processes/connectors/fsspec/sftp.py,sha256=J7Ej-j7dtXAluHunwynUfHlNsYwymb-LsrGUFcljcsA,5700
351
353
  unstructured_ingest/v2/processes/connectors/fsspec/utils.py,sha256=jec_Qfe2hbfahBuY-u8FnvHuv933AI5HwPFjOL3kEEY,456
352
- unstructured_ingest-0.0.0.dist-info/METADATA,sha256=XkFXINFQaqCmpnfNfymilYFVadQCXgkbXUm236ko_so,21501
353
- unstructured_ingest-0.0.0.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
354
- unstructured_ingest-0.0.0.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
355
- unstructured_ingest-0.0.0.dist-info/top_level.txt,sha256=QaTxTcjfM5Hr9sZJ6weOJvSe5ESQc0F8AWkhHInTCf8,20
356
- unstructured_ingest-0.0.0.dist-info/RECORD,,
354
+ unstructured_ingest-0.0.1.dist-info/METADATA,sha256=Qru27Cxrf0C-vFe7MqfaKOfavazrWYTTRif6loKf71o,21568
355
+ unstructured_ingest-0.0.1.dist-info/WHEEL,sha256=GJ7t_kWBFywbagK5eo9IoUwLW6oyOeTKmQ-9iHFVNxQ,92
356
+ unstructured_ingest-0.0.1.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
357
+ unstructured_ingest-0.0.1.dist-info/top_level.txt,sha256=QaTxTcjfM5Hr9sZJ6weOJvSe5ESQc0F8AWkhHInTCf8,20
358
+ unstructured_ingest-0.0.1.dist-info/RECORD,,