unstructured-ingest 1.0.18__py3-none-any.whl → 1.0.21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

@@ -1 +1 @@
1
- __version__ = "1.0.18" # pragma: no cover
1
+ __version__ = "1.0.21" # pragma: no cover
@@ -1,4 +1,4 @@
1
- CREATE TABLE elements (
1
+ CREATE TABLE IF NOT EXISTS `elements` (
2
2
  id STRING NOT NULL PRIMARY KEY,
3
3
  record_id STRING NOT NULL,
4
4
  element_id STRING NOT NULL,
@@ -7,4 +7,3 @@ CREATE TABLE elements (
7
7
  type STRING,
8
8
  metadata VARIANT
9
9
  );
10
-
@@ -136,7 +136,7 @@ class DatabricksVolumeDeltaTableUploader(Uploader):
136
136
  def get_table_columns(self) -> dict[str, str]:
137
137
  if self._columns is None:
138
138
  with self.get_cursor() as cursor:
139
- cursor.execute(f"SELECT * from {self.upload_config.table_name} LIMIT 1")
139
+ cursor.execute(f"SELECT * from `{self.upload_config.table_name}` LIMIT 1")
140
140
  self._columns = {desc[0]: desc[1] for desc in cursor.description}
141
141
  return self._columns
142
142
 
@@ -152,7 +152,7 @@ class DatabricksVolumeDeltaTableUploader(Uploader):
152
152
  )
153
153
  with self.get_cursor() as cursor:
154
154
  cursor.execute(
155
- f"DELETE FROM {self.upload_config.table_name} WHERE {RECORD_ID_LABEL} = '{file_data.identifier}'" # noqa: E501
155
+ f"DELETE FROM `{self.upload_config.table_name}` WHERE {RECORD_ID_LABEL} = '{file_data.identifier}'" # noqa: E501
156
156
  )
157
157
  results = cursor.fetchall()
158
158
  deleted_rows = results[0][0]
@@ -3,7 +3,7 @@ from collections import abc
3
3
  from contextlib import contextmanager
4
4
  from dataclasses import dataclass, field
5
5
  from pathlib import Path
6
- from typing import TYPE_CHECKING, Any, Dict, Generator, List, Optional, Union
6
+ from typing import TYPE_CHECKING, Any, Dict, Generator, List, Optional, Union, cast
7
7
 
8
8
  from pydantic import Field, Secret
9
9
 
@@ -169,8 +169,28 @@ class JiraConnectionConfig(ConnectionConfig):
169
169
  def get_client(self) -> Generator["Jira", None, None]:
170
170
  from atlassian import Jira
171
171
 
172
+ class CustomJira(Jira):
173
+ """
174
+ Custom Jira class to fix the issue with the get_project_issues_count method.
175
+ This class inherits from the original Jira class and overrides the method to
176
+ handle the response correctly.
177
+ Once the issue is fixed in the original library, this class can be removed.
178
+ """
179
+
180
+ def __init__(self, *args, **kwargs):
181
+ super().__init__(*args, **kwargs)
182
+
183
+ def get_project_issues_count(self, project: str) -> int:
184
+ jql = f'project = "{project}" '
185
+ response = self.jql(jql, fields="*none")
186
+ response = cast("dict", response)
187
+ if "total" in response:
188
+ return response["total"]
189
+ else:
190
+ return len(response["issues"])
191
+
172
192
  access_configs = self.access_config.get_secret_value()
173
- with Jira(
193
+ with CustomJira(
174
194
  url=self.url,
175
195
  username=self.username,
176
196
  password=access_configs.password,
@@ -234,15 +234,32 @@ class WeaviateUploader(VectorDBUploader, ABC):
234
234
  self.create_destination(**kwargs)
235
235
 
236
236
  def format_destination_name(self, destination_name: str) -> str:
237
- # Weaviate naming requirements:
238
- # must be alphanumeric and underscores only
237
+ """
238
+ Weaviate Collection naming conventions:
239
+ 1. must begin with an uppercase letter
240
+ 2. must be alphanumeric and underscores only
241
+ """
242
+
243
+ # Check if the first character is an uppercase letter
244
+ if not re.match(r"^[a-zA-Z]", destination_name):
245
+ raise ValueError("Collection name must start with an uppercase letter")
246
+ # Replace all non-alphanumeric characters with underscores
239
247
  formatted = re.sub(r"[^a-zA-Z0-9]", "_", destination_name)
240
- # must begin with capital letter
241
- return formatted.capitalize()
248
+ # Make the first character uppercase and leave the rest as is
249
+ if len(formatted) == 1:
250
+ formatted = formatted.capitalize()
251
+ else:
252
+ formatted = formatted[0].capitalize() + formatted[1:]
253
+ if formatted != destination_name:
254
+ logger.warning(
255
+ f"Given Collection name '{destination_name}' doesn't follow naming conventions. "
256
+ f"Renaming to '{formatted}'"
257
+ )
258
+ return formatted
242
259
 
243
260
  def create_destination(
244
261
  self,
245
- destination_name: str = "unstructuredautocreated",
262
+ destination_name: str = "Unstructuredautocreated",
246
263
  vector_length: Optional[int] = None,
247
264
  **kwargs: Any,
248
265
  ) -> bool:
@@ -250,18 +267,18 @@ class WeaviateUploader(VectorDBUploader, ABC):
250
267
  collection_name = self.format_destination_name(collection_name)
251
268
  self.upload_config.collection = collection_name
252
269
 
253
- connectors_dir = Path(__file__).parents[1]
254
- collection_config_file = connectors_dir / "assets" / "weaviate_collection_config.json"
255
- with collection_config_file.open() as f:
256
- collection_config = json.load(f)
257
- collection_config["class"] = collection_name
258
-
259
270
  if not self._collection_exists():
260
- logger.info(f"creating weaviate collection '{collection_name}' with default configs")
271
+ connectors_dir = Path(__file__).parents[1]
272
+ collection_config_file = connectors_dir / "assets" / "weaviate_collection_config.json"
273
+ with collection_config_file.open() as f:
274
+ collection_config = json.load(f)
275
+ collection_config["class"] = collection_name
276
+
277
+ logger.info(f"Creating weaviate collection '{collection_name}' with default configs")
261
278
  with self.connection_config.get_client() as weaviate_client:
262
279
  weaviate_client.collections.create_from_dict(config=collection_config)
263
280
  return True
264
- logger.debug(f"collection with name '{collection_name}' already exists, skipping creation")
281
+ logger.debug(f"Collection with name '{collection_name}' already exists, skipping creation")
265
282
  return False
266
283
 
267
284
  def check_for_errors(self, client: "WeaviateClient") -> None:
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: unstructured_ingest
3
- Version: 1.0.18
3
+ Version: 1.0.21
4
4
  Summary: Local ETL data pipeline to get data RAG ready
5
5
  Author-email: Unstructured Technologies <devops@unstructuredai.io>
6
6
  License-Expression: Apache-2.0
@@ -1,5 +1,5 @@
1
1
  unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
2
- unstructured_ingest/__version__.py,sha256=_WdLW6DLLv5QXi1-R0iwdBbseUYvOyWeY5pyAXABOCY,43
2
+ unstructured_ingest/__version__.py,sha256=_fAo4tbdJV7k_s1lgXUPPmLFVpxbTy7HhoN9KbPxQ4Y,43
3
3
  unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
4
4
  unstructured_ingest/errors_v2.py,sha256=9RuRCi7lbDxCguDz07y5RiHoQiFIOWwOD7xqzJ2B3Yw,436
5
5
  unstructured_ingest/logger.py,sha256=7e_7UeK6hVOd5BQ6i9NzRUAPCS_DF839Y8TjUDywraY,1428
@@ -73,7 +73,7 @@ unstructured_ingest/processes/connectors/discord.py,sha256=6yEJ_agfKUqsV43wFsbMk
73
73
  unstructured_ingest/processes/connectors/github.py,sha256=smHCz6jOH1p_hW2S25bYunBBj_pYjz8HTw6wkzaJz_A,7765
74
74
  unstructured_ingest/processes/connectors/gitlab.py,sha256=6h1CdqznJmzeWxGfXrFLdNdT23PExGnUMMX7usK_4Kk,10013
75
75
  unstructured_ingest/processes/connectors/google_drive.py,sha256=BIFBZGp26JlBBOcXy5Gq0UoNzWv6pwRKhEAHMVMI2_M,25050
76
- unstructured_ingest/processes/connectors/jira.py,sha256=eG8yTn8ZVEz7rBJ-ha8i_d9hEh6VALN6QJT_vbYvbL0,17142
76
+ unstructured_ingest/processes/connectors/jira.py,sha256=alnwUYyID-mUIlGq1xh5QGEw2iZ2RwbOIyptev3dI6Q,18011
77
77
  unstructured_ingest/processes/connectors/kdbai.py,sha256=XhxYpKSAoFPBsDQWwNuLX03DCxOVr7yquj9VYM55Rtc,5174
78
78
  unstructured_ingest/processes/connectors/local.py,sha256=LluTLKv4g7FbJb4A6vuSxI9VhzKZuuQUpDS-cVNAQ2g,7426
79
79
  unstructured_ingest/processes/connectors/milvus.py,sha256=Jr9cul7By03tGAPFnFBoqncnNWwbhKd-qbmkuqnin8U,8908
@@ -89,7 +89,7 @@ unstructured_ingest/processes/connectors/slack.py,sha256=EkFj9PcAu5_gF2xLogikKDA
89
89
  unstructured_ingest/processes/connectors/utils.py,sha256=TAd0hb1f291N-q7-TUe6JKSCGkhqDyo7Ij8zmliBZUc,2071
90
90
  unstructured_ingest/processes/connectors/vectara.py,sha256=xrC6jkgW8BII4UjdzUelDu122xT484cpfMTK2wl-sko,12292
91
91
  unstructured_ingest/processes/connectors/assets/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
92
- unstructured_ingest/processes/connectors/assets/databricks_delta_table_schema.sql,sha256=dUZZDNkyvQXKqoAThRz3ek7zaUE2l_LAQimlG5WZhH4,211
92
+ unstructured_ingest/processes/connectors/assets/databricks_delta_table_schema.sql,sha256=8a9HTcRWA6IuswSD632b_uZSO6Dax_0rUYnflqktcek,226
93
93
  unstructured_ingest/processes/connectors/assets/weaviate_collection_config.json,sha256=SJlIO0kXxy866tWQ8bEzvwLwflsoUMIS-OKlxMvHIuE,504
94
94
  unstructured_ingest/processes/connectors/databricks/__init__.py,sha256=RtKAPyNtXh6fzEsOQ08pA0-vC1uMr3KqYG6cqiBoo70,2133
95
95
  unstructured_ingest/processes/connectors/databricks/volumes.py,sha256=OWQrne9-5hPzc-kxGa2P53M3DoksDzMDyjLhQyihdCo,8020
@@ -97,7 +97,7 @@ unstructured_ingest/processes/connectors/databricks/volumes_aws.py,sha256=WhGTp6
97
97
  unstructured_ingest/processes/connectors/databricks/volumes_azure.py,sha256=pF2d6uAIbwJJUeOIG5xknUMCGc5d9Aztmc2776wp-a0,3740
98
98
  unstructured_ingest/processes/connectors/databricks/volumes_gcp.py,sha256=y9AvVl6PtnIxlTlrPj_wyHBDBRJNq3uoTOuZwTryNg8,2994
99
99
  unstructured_ingest/processes/connectors/databricks/volumes_native.py,sha256=pivySGMmFSsyuB42ARAWAPXFQ7qTQxO3dfEoE23pBNM,3104
100
- unstructured_ingest/processes/connectors/databricks/volumes_table.py,sha256=tqi6PpYpIBMTZcYZXl5Lw0YuawyDvjHI08TKPFFTTr0,8194
100
+ unstructured_ingest/processes/connectors/databricks/volumes_table.py,sha256=K-EBsV99I9ubD3A0cqAJTC4vpSwrnBeACFGWbgGCSsY,8198
101
101
  unstructured_ingest/processes/connectors/duckdb/__init__.py,sha256=Dr6BRJJGefJnnp_vn5W5gBd7vrCCXTMLweuDIqTP-fM,558
102
102
  unstructured_ingest/processes/connectors/duckdb/base.py,sha256=bTLhilg6mgERNCpeeNNl7wxy3xkOt23O9XpCyD0WVY4,2945
103
103
  unstructured_ingest/processes/connectors/duckdb/duckdb.py,sha256=jsmibTd_yvYzkCT05HhCJvplyobtjfNILC3zyTuCcVY,4464
@@ -214,7 +214,7 @@ unstructured_ingest/processes/connectors/weaviate/__init__.py,sha256=1Vnz8hm_Cf3
214
214
  unstructured_ingest/processes/connectors/weaviate/cloud.py,sha256=tDQ4Vfph1RwADzS0Lk4TSoeT6TZ2gX9DNi78yXkgDw0,6245
215
215
  unstructured_ingest/processes/connectors/weaviate/embedded.py,sha256=buizqBd6PSbd9VgRrOj43GZEorBpDFkUIkE6sN9emhw,3008
216
216
  unstructured_ingest/processes/connectors/weaviate/local.py,sha256=4fgZsL9dgnWuaSNqVlKROm-S3Ql3naLmKvigLBgUQdw,2195
217
- unstructured_ingest/processes/connectors/weaviate/weaviate.py,sha256=SqtGcQgejGH0N1R49tGrUtGcTB8mt7sywXmWFTIcpB8,12866
217
+ unstructured_ingest/processes/connectors/weaviate/weaviate.py,sha256=yB67gxvo3X0UaP_mNeB0HbSWXst7ur0E2QKwLA0gIS4,13647
218
218
  unstructured_ingest/processes/connectors/zendesk/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
219
219
  unstructured_ingest/processes/connectors/zendesk/client.py,sha256=GvPIpx4aYdD58-edHgvCFjFao94uR0O5Yf4dT9NCmSk,11952
220
220
  unstructured_ingest/processes/connectors/zendesk/zendesk.py,sha256=j5zS_7vJmYDEQtysz_UfwIUH65gc4r-Zjc1LocJr9FM,9033
@@ -231,8 +231,8 @@ unstructured_ingest/utils/ndjson.py,sha256=nz8VUOPEgAFdhaDOpuveknvCU4x82fVwqE01q
231
231
  unstructured_ingest/utils/pydantic_models.py,sha256=BT_j15e4rX40wQbt8LUXbqfPhA3rJn1PHTI_G_A_EHY,1720
232
232
  unstructured_ingest/utils/string_and_date_utils.py,sha256=oXOI6rxXq-8ncbk7EoJK0WCcTXWj75EzKl8pfQMID3U,2522
233
233
  unstructured_ingest/utils/table.py,sha256=WZechczgVFvlodUWFcsnCGvBNh1xRm6hr0VbJTPxKAc,3669
234
- unstructured_ingest-1.0.18.dist-info/METADATA,sha256=Ab6dhItl8CiP5OYQContbtpnfBpz77OsIecAyjgb_DA,8694
235
- unstructured_ingest-1.0.18.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
236
- unstructured_ingest-1.0.18.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
237
- unstructured_ingest-1.0.18.dist-info/licenses/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
238
- unstructured_ingest-1.0.18.dist-info/RECORD,,
234
+ unstructured_ingest-1.0.21.dist-info/METADATA,sha256=lYMmxWJ0ySauI_NWrAQo4YZQ7pXAK4bZ0dX0XIsgacE,8694
235
+ unstructured_ingest-1.0.21.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
236
+ unstructured_ingest-1.0.21.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
237
+ unstructured_ingest-1.0.21.dist-info/licenses/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
238
+ unstructured_ingest-1.0.21.dist-info/RECORD,,