unstructured-ingest 1.0.18__py3-none-any.whl → 1.0.21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- unstructured_ingest/__version__.py +1 -1
- unstructured_ingest/processes/connectors/assets/databricks_delta_table_schema.sql +1 -2
- unstructured_ingest/processes/connectors/databricks/volumes_table.py +2 -2
- unstructured_ingest/processes/connectors/jira.py +22 -2
- unstructured_ingest/processes/connectors/weaviate/weaviate.py +30 -13
- {unstructured_ingest-1.0.18.dist-info → unstructured_ingest-1.0.21.dist-info}/METADATA +1 -1
- {unstructured_ingest-1.0.18.dist-info → unstructured_ingest-1.0.21.dist-info}/RECORD +10 -10
- {unstructured_ingest-1.0.18.dist-info → unstructured_ingest-1.0.21.dist-info}/WHEEL +0 -0
- {unstructured_ingest-1.0.18.dist-info → unstructured_ingest-1.0.21.dist-info}/entry_points.txt +0 -0
- {unstructured_ingest-1.0.18.dist-info → unstructured_ingest-1.0.21.dist-info}/licenses/LICENSE.md +0 -0
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "1.0.
|
|
1
|
+
__version__ = "1.0.21" # pragma: no cover
|
|
@@ -136,7 +136,7 @@ class DatabricksVolumeDeltaTableUploader(Uploader):
|
|
|
136
136
|
def get_table_columns(self) -> dict[str, str]:
|
|
137
137
|
if self._columns is None:
|
|
138
138
|
with self.get_cursor() as cursor:
|
|
139
|
-
cursor.execute(f"SELECT * from {self.upload_config.table_name} LIMIT 1")
|
|
139
|
+
cursor.execute(f"SELECT * from `{self.upload_config.table_name}` LIMIT 1")
|
|
140
140
|
self._columns = {desc[0]: desc[1] for desc in cursor.description}
|
|
141
141
|
return self._columns
|
|
142
142
|
|
|
@@ -152,7 +152,7 @@ class DatabricksVolumeDeltaTableUploader(Uploader):
|
|
|
152
152
|
)
|
|
153
153
|
with self.get_cursor() as cursor:
|
|
154
154
|
cursor.execute(
|
|
155
|
-
f"DELETE FROM {self.upload_config.table_name} WHERE {RECORD_ID_LABEL} = '{file_data.identifier}'" # noqa: E501
|
|
155
|
+
f"DELETE FROM `{self.upload_config.table_name}` WHERE {RECORD_ID_LABEL} = '{file_data.identifier}'" # noqa: E501
|
|
156
156
|
)
|
|
157
157
|
results = cursor.fetchall()
|
|
158
158
|
deleted_rows = results[0][0]
|
|
@@ -3,7 +3,7 @@ from collections import abc
|
|
|
3
3
|
from contextlib import contextmanager
|
|
4
4
|
from dataclasses import dataclass, field
|
|
5
5
|
from pathlib import Path
|
|
6
|
-
from typing import TYPE_CHECKING, Any, Dict, Generator, List, Optional, Union
|
|
6
|
+
from typing import TYPE_CHECKING, Any, Dict, Generator, List, Optional, Union, cast
|
|
7
7
|
|
|
8
8
|
from pydantic import Field, Secret
|
|
9
9
|
|
|
@@ -169,8 +169,28 @@ class JiraConnectionConfig(ConnectionConfig):
|
|
|
169
169
|
def get_client(self) -> Generator["Jira", None, None]:
|
|
170
170
|
from atlassian import Jira
|
|
171
171
|
|
|
172
|
+
class CustomJira(Jira):
|
|
173
|
+
"""
|
|
174
|
+
Custom Jira class to fix the issue with the get_project_issues_count method.
|
|
175
|
+
This class inherits from the original Jira class and overrides the method to
|
|
176
|
+
handle the response correctly.
|
|
177
|
+
Once the issue is fixed in the original library, this class can be removed.
|
|
178
|
+
"""
|
|
179
|
+
|
|
180
|
+
def __init__(self, *args, **kwargs):
|
|
181
|
+
super().__init__(*args, **kwargs)
|
|
182
|
+
|
|
183
|
+
def get_project_issues_count(self, project: str) -> int:
|
|
184
|
+
jql = f'project = "{project}" '
|
|
185
|
+
response = self.jql(jql, fields="*none")
|
|
186
|
+
response = cast("dict", response)
|
|
187
|
+
if "total" in response:
|
|
188
|
+
return response["total"]
|
|
189
|
+
else:
|
|
190
|
+
return len(response["issues"])
|
|
191
|
+
|
|
172
192
|
access_configs = self.access_config.get_secret_value()
|
|
173
|
-
with
|
|
193
|
+
with CustomJira(
|
|
174
194
|
url=self.url,
|
|
175
195
|
username=self.username,
|
|
176
196
|
password=access_configs.password,
|
|
@@ -234,15 +234,32 @@ class WeaviateUploader(VectorDBUploader, ABC):
|
|
|
234
234
|
self.create_destination(**kwargs)
|
|
235
235
|
|
|
236
236
|
def format_destination_name(self, destination_name: str) -> str:
|
|
237
|
-
|
|
238
|
-
|
|
237
|
+
"""
|
|
238
|
+
Weaviate Collection naming conventions:
|
|
239
|
+
1. must begin with an uppercase letter
|
|
240
|
+
2. must be alphanumeric and underscores only
|
|
241
|
+
"""
|
|
242
|
+
|
|
243
|
+
# Check if the first character is an uppercase letter
|
|
244
|
+
if not re.match(r"^[a-zA-Z]", destination_name):
|
|
245
|
+
raise ValueError("Collection name must start with an uppercase letter")
|
|
246
|
+
# Replace all non-alphanumeric characters with underscores
|
|
239
247
|
formatted = re.sub(r"[^a-zA-Z0-9]", "_", destination_name)
|
|
240
|
-
#
|
|
241
|
-
|
|
248
|
+
# Make the first character uppercase and leave the rest as is
|
|
249
|
+
if len(formatted) == 1:
|
|
250
|
+
formatted = formatted.capitalize()
|
|
251
|
+
else:
|
|
252
|
+
formatted = formatted[0].capitalize() + formatted[1:]
|
|
253
|
+
if formatted != destination_name:
|
|
254
|
+
logger.warning(
|
|
255
|
+
f"Given Collection name '{destination_name}' doesn't follow naming conventions. "
|
|
256
|
+
f"Renaming to '{formatted}'"
|
|
257
|
+
)
|
|
258
|
+
return formatted
|
|
242
259
|
|
|
243
260
|
def create_destination(
|
|
244
261
|
self,
|
|
245
|
-
destination_name: str = "
|
|
262
|
+
destination_name: str = "Unstructuredautocreated",
|
|
246
263
|
vector_length: Optional[int] = None,
|
|
247
264
|
**kwargs: Any,
|
|
248
265
|
) -> bool:
|
|
@@ -250,18 +267,18 @@ class WeaviateUploader(VectorDBUploader, ABC):
|
|
|
250
267
|
collection_name = self.format_destination_name(collection_name)
|
|
251
268
|
self.upload_config.collection = collection_name
|
|
252
269
|
|
|
253
|
-
connectors_dir = Path(__file__).parents[1]
|
|
254
|
-
collection_config_file = connectors_dir / "assets" / "weaviate_collection_config.json"
|
|
255
|
-
with collection_config_file.open() as f:
|
|
256
|
-
collection_config = json.load(f)
|
|
257
|
-
collection_config["class"] = collection_name
|
|
258
|
-
|
|
259
270
|
if not self._collection_exists():
|
|
260
|
-
|
|
271
|
+
connectors_dir = Path(__file__).parents[1]
|
|
272
|
+
collection_config_file = connectors_dir / "assets" / "weaviate_collection_config.json"
|
|
273
|
+
with collection_config_file.open() as f:
|
|
274
|
+
collection_config = json.load(f)
|
|
275
|
+
collection_config["class"] = collection_name
|
|
276
|
+
|
|
277
|
+
logger.info(f"Creating weaviate collection '{collection_name}' with default configs")
|
|
261
278
|
with self.connection_config.get_client() as weaviate_client:
|
|
262
279
|
weaviate_client.collections.create_from_dict(config=collection_config)
|
|
263
280
|
return True
|
|
264
|
-
logger.debug(f"
|
|
281
|
+
logger.debug(f"Collection with name '{collection_name}' already exists, skipping creation")
|
|
265
282
|
return False
|
|
266
283
|
|
|
267
284
|
def check_for_errors(self, client: "WeaviateClient") -> None:
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
|
|
2
|
-
unstructured_ingest/__version__.py,sha256=
|
|
2
|
+
unstructured_ingest/__version__.py,sha256=_fAo4tbdJV7k_s1lgXUPPmLFVpxbTy7HhoN9KbPxQ4Y,43
|
|
3
3
|
unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
|
|
4
4
|
unstructured_ingest/errors_v2.py,sha256=9RuRCi7lbDxCguDz07y5RiHoQiFIOWwOD7xqzJ2B3Yw,436
|
|
5
5
|
unstructured_ingest/logger.py,sha256=7e_7UeK6hVOd5BQ6i9NzRUAPCS_DF839Y8TjUDywraY,1428
|
|
@@ -73,7 +73,7 @@ unstructured_ingest/processes/connectors/discord.py,sha256=6yEJ_agfKUqsV43wFsbMk
|
|
|
73
73
|
unstructured_ingest/processes/connectors/github.py,sha256=smHCz6jOH1p_hW2S25bYunBBj_pYjz8HTw6wkzaJz_A,7765
|
|
74
74
|
unstructured_ingest/processes/connectors/gitlab.py,sha256=6h1CdqznJmzeWxGfXrFLdNdT23PExGnUMMX7usK_4Kk,10013
|
|
75
75
|
unstructured_ingest/processes/connectors/google_drive.py,sha256=BIFBZGp26JlBBOcXy5Gq0UoNzWv6pwRKhEAHMVMI2_M,25050
|
|
76
|
-
unstructured_ingest/processes/connectors/jira.py,sha256=
|
|
76
|
+
unstructured_ingest/processes/connectors/jira.py,sha256=alnwUYyID-mUIlGq1xh5QGEw2iZ2RwbOIyptev3dI6Q,18011
|
|
77
77
|
unstructured_ingest/processes/connectors/kdbai.py,sha256=XhxYpKSAoFPBsDQWwNuLX03DCxOVr7yquj9VYM55Rtc,5174
|
|
78
78
|
unstructured_ingest/processes/connectors/local.py,sha256=LluTLKv4g7FbJb4A6vuSxI9VhzKZuuQUpDS-cVNAQ2g,7426
|
|
79
79
|
unstructured_ingest/processes/connectors/milvus.py,sha256=Jr9cul7By03tGAPFnFBoqncnNWwbhKd-qbmkuqnin8U,8908
|
|
@@ -89,7 +89,7 @@ unstructured_ingest/processes/connectors/slack.py,sha256=EkFj9PcAu5_gF2xLogikKDA
|
|
|
89
89
|
unstructured_ingest/processes/connectors/utils.py,sha256=TAd0hb1f291N-q7-TUe6JKSCGkhqDyo7Ij8zmliBZUc,2071
|
|
90
90
|
unstructured_ingest/processes/connectors/vectara.py,sha256=xrC6jkgW8BII4UjdzUelDu122xT484cpfMTK2wl-sko,12292
|
|
91
91
|
unstructured_ingest/processes/connectors/assets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
92
|
-
unstructured_ingest/processes/connectors/assets/databricks_delta_table_schema.sql,sha256=
|
|
92
|
+
unstructured_ingest/processes/connectors/assets/databricks_delta_table_schema.sql,sha256=8a9HTcRWA6IuswSD632b_uZSO6Dax_0rUYnflqktcek,226
|
|
93
93
|
unstructured_ingest/processes/connectors/assets/weaviate_collection_config.json,sha256=SJlIO0kXxy866tWQ8bEzvwLwflsoUMIS-OKlxMvHIuE,504
|
|
94
94
|
unstructured_ingest/processes/connectors/databricks/__init__.py,sha256=RtKAPyNtXh6fzEsOQ08pA0-vC1uMr3KqYG6cqiBoo70,2133
|
|
95
95
|
unstructured_ingest/processes/connectors/databricks/volumes.py,sha256=OWQrne9-5hPzc-kxGa2P53M3DoksDzMDyjLhQyihdCo,8020
|
|
@@ -97,7 +97,7 @@ unstructured_ingest/processes/connectors/databricks/volumes_aws.py,sha256=WhGTp6
|
|
|
97
97
|
unstructured_ingest/processes/connectors/databricks/volumes_azure.py,sha256=pF2d6uAIbwJJUeOIG5xknUMCGc5d9Aztmc2776wp-a0,3740
|
|
98
98
|
unstructured_ingest/processes/connectors/databricks/volumes_gcp.py,sha256=y9AvVl6PtnIxlTlrPj_wyHBDBRJNq3uoTOuZwTryNg8,2994
|
|
99
99
|
unstructured_ingest/processes/connectors/databricks/volumes_native.py,sha256=pivySGMmFSsyuB42ARAWAPXFQ7qTQxO3dfEoE23pBNM,3104
|
|
100
|
-
unstructured_ingest/processes/connectors/databricks/volumes_table.py,sha256=
|
|
100
|
+
unstructured_ingest/processes/connectors/databricks/volumes_table.py,sha256=K-EBsV99I9ubD3A0cqAJTC4vpSwrnBeACFGWbgGCSsY,8198
|
|
101
101
|
unstructured_ingest/processes/connectors/duckdb/__init__.py,sha256=Dr6BRJJGefJnnp_vn5W5gBd7vrCCXTMLweuDIqTP-fM,558
|
|
102
102
|
unstructured_ingest/processes/connectors/duckdb/base.py,sha256=bTLhilg6mgERNCpeeNNl7wxy3xkOt23O9XpCyD0WVY4,2945
|
|
103
103
|
unstructured_ingest/processes/connectors/duckdb/duckdb.py,sha256=jsmibTd_yvYzkCT05HhCJvplyobtjfNILC3zyTuCcVY,4464
|
|
@@ -214,7 +214,7 @@ unstructured_ingest/processes/connectors/weaviate/__init__.py,sha256=1Vnz8hm_Cf3
|
|
|
214
214
|
unstructured_ingest/processes/connectors/weaviate/cloud.py,sha256=tDQ4Vfph1RwADzS0Lk4TSoeT6TZ2gX9DNi78yXkgDw0,6245
|
|
215
215
|
unstructured_ingest/processes/connectors/weaviate/embedded.py,sha256=buizqBd6PSbd9VgRrOj43GZEorBpDFkUIkE6sN9emhw,3008
|
|
216
216
|
unstructured_ingest/processes/connectors/weaviate/local.py,sha256=4fgZsL9dgnWuaSNqVlKROm-S3Ql3naLmKvigLBgUQdw,2195
|
|
217
|
-
unstructured_ingest/processes/connectors/weaviate/weaviate.py,sha256=
|
|
217
|
+
unstructured_ingest/processes/connectors/weaviate/weaviate.py,sha256=yB67gxvo3X0UaP_mNeB0HbSWXst7ur0E2QKwLA0gIS4,13647
|
|
218
218
|
unstructured_ingest/processes/connectors/zendesk/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
219
219
|
unstructured_ingest/processes/connectors/zendesk/client.py,sha256=GvPIpx4aYdD58-edHgvCFjFao94uR0O5Yf4dT9NCmSk,11952
|
|
220
220
|
unstructured_ingest/processes/connectors/zendesk/zendesk.py,sha256=j5zS_7vJmYDEQtysz_UfwIUH65gc4r-Zjc1LocJr9FM,9033
|
|
@@ -231,8 +231,8 @@ unstructured_ingest/utils/ndjson.py,sha256=nz8VUOPEgAFdhaDOpuveknvCU4x82fVwqE01q
|
|
|
231
231
|
unstructured_ingest/utils/pydantic_models.py,sha256=BT_j15e4rX40wQbt8LUXbqfPhA3rJn1PHTI_G_A_EHY,1720
|
|
232
232
|
unstructured_ingest/utils/string_and_date_utils.py,sha256=oXOI6rxXq-8ncbk7EoJK0WCcTXWj75EzKl8pfQMID3U,2522
|
|
233
233
|
unstructured_ingest/utils/table.py,sha256=WZechczgVFvlodUWFcsnCGvBNh1xRm6hr0VbJTPxKAc,3669
|
|
234
|
-
unstructured_ingest-1.0.
|
|
235
|
-
unstructured_ingest-1.0.
|
|
236
|
-
unstructured_ingest-1.0.
|
|
237
|
-
unstructured_ingest-1.0.
|
|
238
|
-
unstructured_ingest-1.0.
|
|
234
|
+
unstructured_ingest-1.0.21.dist-info/METADATA,sha256=lYMmxWJ0ySauI_NWrAQo4YZQ7pXAK4bZ0dX0XIsgacE,8694
|
|
235
|
+
unstructured_ingest-1.0.21.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
236
|
+
unstructured_ingest-1.0.21.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
|
|
237
|
+
unstructured_ingest-1.0.21.dist-info/licenses/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
|
|
238
|
+
unstructured_ingest-1.0.21.dist-info/RECORD,,
|
|
File without changes
|
{unstructured_ingest-1.0.18.dist-info → unstructured_ingest-1.0.21.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{unstructured_ingest-1.0.18.dist-info → unstructured_ingest-1.0.21.dist-info}/licenses/LICENSE.md
RENAMED
|
File without changes
|