unstructured-ingest 1.1.3__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- unstructured_ingest/__version__.py +1 -1
- unstructured_ingest/otel.py +10 -8
- unstructured_ingest/processes/connectors/elasticsearch/elasticsearch.py +1 -1
- unstructured_ingest/processes/connectors/fsspec/fsspec.py +1 -1
- unstructured_ingest/utils/filesystem.py +4 -4
- {unstructured_ingest-1.1.3.dist-info → unstructured_ingest-1.2.0.dist-info}/METADATA +2 -3
- {unstructured_ingest-1.1.3.dist-info → unstructured_ingest-1.2.0.dist-info}/RECORD +10 -10
- {unstructured_ingest-1.1.3.dist-info → unstructured_ingest-1.2.0.dist-info}/WHEEL +0 -0
- {unstructured_ingest-1.1.3.dist-info → unstructured_ingest-1.2.0.dist-info}/entry_points.txt +0 -0
- {unstructured_ingest-1.1.3.dist-info → unstructured_ingest-1.2.0.dist-info}/licenses/LICENSE.md +0 -0
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "1.
|
|
1
|
+
__version__ = "1.2.0" # pragma: no cover
|
unstructured_ingest/otel.py
CHANGED
|
@@ -32,21 +32,23 @@ class LogSpanExporter(ConsoleSpanExporter):
|
|
|
32
32
|
self.log_out(self.formatter(span))
|
|
33
33
|
return SpanExportResult.SUCCESS
|
|
34
34
|
|
|
35
|
+
|
|
35
36
|
def get_log_out() -> Callable:
|
|
36
37
|
level_names_mapping = {
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
38
|
+
"CRITICAL": logging.CRITICAL,
|
|
39
|
+
"FATAL": logging.FATAL,
|
|
40
|
+
"ERROR": logging.ERROR,
|
|
41
|
+
"WARN": logging.WARNING,
|
|
42
|
+
"WARNING": logging.WARNING,
|
|
43
|
+
"INFO": logging.INFO,
|
|
44
|
+
"DEBUG": logging.DEBUG,
|
|
45
|
+
"NOTSET": logging.NOTSET,
|
|
45
46
|
}
|
|
46
47
|
log_level = os.getenv("OTEL_LOG_LEVEL", "DEBUG").upper()
|
|
47
48
|
log_level_int = level_names_mapping.get(log_level, logging.DEBUG)
|
|
48
49
|
return lambda message: logger.log(log_level_int, message)
|
|
49
50
|
|
|
51
|
+
|
|
50
52
|
@dataclass
|
|
51
53
|
class OtelHandler:
|
|
52
54
|
otel_endpoint: Optional[str] = None
|
|
@@ -335,7 +335,7 @@ class ElasticsearchUploadStager(UploadStager):
|
|
|
335
335
|
|
|
336
336
|
def conform_dict(self, element_dict: dict, file_data: FileData) -> dict:
|
|
337
337
|
data = element_dict.copy()
|
|
338
|
-
# when _op_type is not specified, it defaults to "index":
|
|
338
|
+
# when _op_type is not specified, it defaults to "index":
|
|
339
339
|
# Overwrites if exists, creates if not.
|
|
340
340
|
resp = {
|
|
341
341
|
"_index": self.upload_stager_config.index_name,
|
|
@@ -385,7 +385,7 @@ class FsspecUploader(Uploader):
|
|
|
385
385
|
|
|
386
386
|
def __post_init__(self):
|
|
387
387
|
super().__post_init__()
|
|
388
|
-
# TODO
|
|
388
|
+
# TODO: Consider using `kw_only` instead
|
|
389
389
|
if not self.upload_config:
|
|
390
390
|
raise TypeError(
|
|
391
391
|
f"{self.__class__.__name__}.__init__() "
|
|
@@ -11,12 +11,12 @@ from pathlib import Path
|
|
|
11
11
|
def mkdir_concurrent_safe(path: Path) -> None:
|
|
12
12
|
"""
|
|
13
13
|
Create directory safely in concurrent environments, handling race conditions.
|
|
14
|
-
|
|
14
|
+
|
|
15
15
|
This addresses the issue where Path.mkdir(parents=True, exist_ok=True) can still
|
|
16
|
-
raise FileExistsError when multiple processes attempt to create overlapping
|
|
16
|
+
raise FileExistsError when multiple processes attempt to create overlapping
|
|
17
17
|
directory structures simultaneously. In this codebase, this occurs when multiple
|
|
18
18
|
files are being downloaded in parallel and archive extraction is happening in parallel.
|
|
19
|
-
|
|
19
|
+
|
|
20
20
|
Related: https://github.com/python/cpython/pull/112966/files
|
|
21
21
|
Python core team used the same approach to fix zipfile race conditions.
|
|
22
22
|
"""
|
|
@@ -24,4 +24,4 @@ def mkdir_concurrent_safe(path: Path) -> None:
|
|
|
24
24
|
path.mkdir(parents=True, exist_ok=True)
|
|
25
25
|
except FileExistsError:
|
|
26
26
|
if not (path.exists() and path.is_dir()):
|
|
27
|
-
raise
|
|
27
|
+
raise
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: unstructured_ingest
|
|
3
|
-
Version: 1.
|
|
3
|
+
Version: 1.2.0
|
|
4
4
|
Summary: Local ETL data pipeline to get data RAG ready
|
|
5
5
|
Author-email: Unstructured Technologies <devops@unstructuredai.io>
|
|
6
6
|
License-Expression: Apache-2.0
|
|
@@ -12,12 +12,11 @@ Classifier: Intended Audience :: Science/Research
|
|
|
12
12
|
Classifier: License :: OSI Approved :: Apache Software License
|
|
13
13
|
Classifier: Operating System :: OS Independent
|
|
14
14
|
Classifier: Programming Language :: Python :: 3
|
|
15
|
-
Classifier: Programming Language :: Python :: 3.9
|
|
16
15
|
Classifier: Programming Language :: Python :: 3.10
|
|
17
16
|
Classifier: Programming Language :: Python :: 3.11
|
|
18
17
|
Classifier: Programming Language :: Python :: 3.12
|
|
19
18
|
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
20
|
-
Requires-Python: <3.13,>=3.
|
|
19
|
+
Requires-Python: <3.13,>=3.10
|
|
21
20
|
Requires-Dist: certifi>=2025.7.14
|
|
22
21
|
Requires-Dist: click
|
|
23
22
|
Requires-Dist: opentelemetry-sdk
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
unstructured_ingest/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
|
|
2
|
-
unstructured_ingest/__version__.py,sha256=
|
|
2
|
+
unstructured_ingest/__version__.py,sha256=hYDudzvEd17dAmuuGVXnxBsg1JfgQBVtuuoFz06SeYo,42
|
|
3
3
|
unstructured_ingest/error.py,sha256=qDncnJgbf5ils956RcO2CGlAKYDT5OaEM9Clv1JVTNc,1448
|
|
4
4
|
unstructured_ingest/errors_v2.py,sha256=9RuRCi7lbDxCguDz07y5RiHoQiFIOWwOD7xqzJ2B3Yw,436
|
|
5
5
|
unstructured_ingest/logger.py,sha256=7e_7UeK6hVOd5BQ6i9NzRUAPCS_DF839Y8TjUDywraY,1428
|
|
6
6
|
unstructured_ingest/main.py,sha256=82G_7eG4PNhc_xIqj4Y_sFbDV9VI-nwSfsfJQMzovMk,169
|
|
7
|
-
unstructured_ingest/otel.py,sha256=
|
|
7
|
+
unstructured_ingest/otel.py,sha256=YalGjdPPuZKRoIfGU-O62-EXkTvn5BWVyu-ztRflax4,4671
|
|
8
8
|
unstructured_ingest/unstructured_api.py,sha256=4e2ZNWIihk0eje4R3ZQ0NOYNbmMZDv_O-rnJo94kaGE,5127
|
|
9
9
|
unstructured_ingest/cli/README.md,sha256=lfsXY2jOO__OuDYcIs8N0yLhZWzrSQ_dyXbSFtEMlQ8,1504
|
|
10
10
|
unstructured_ingest/cli/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -103,13 +103,13 @@ unstructured_ingest/processes/connectors/duckdb/base.py,sha256=bTLhilg6mgERNCpee
|
|
|
103
103
|
unstructured_ingest/processes/connectors/duckdb/duckdb.py,sha256=jsmibTd_yvYzkCT05HhCJvplyobtjfNILC3zyTuCcVY,4464
|
|
104
104
|
unstructured_ingest/processes/connectors/duckdb/motherduck.py,sha256=Atr2MjJQGFGWh5aeiQsLpUbFw-aCZH-ABI1LprDh5VI,4727
|
|
105
105
|
unstructured_ingest/processes/connectors/elasticsearch/__init__.py,sha256=M8mmBWoP6J5R3hxg6BQUMexYlTUxUxdBoIcjUop8yt8,826
|
|
106
|
-
unstructured_ingest/processes/connectors/elasticsearch/elasticsearch.py,sha256=
|
|
106
|
+
unstructured_ingest/processes/connectors/elasticsearch/elasticsearch.py,sha256=nh1VptiryyudZcRDiml38wikDcGIHVoXQfPKR9_7ugI,18772
|
|
107
107
|
unstructured_ingest/processes/connectors/elasticsearch/opensearch.py,sha256=wggHvw8h-X0-3WPNxj9rt2xkrE7Pv7CV0B0KzTMzBB4,6944
|
|
108
108
|
unstructured_ingest/processes/connectors/fsspec/__init__.py,sha256=3HTdw4L4mdN4W8UX0jQbMxBg0ZfITPbEXU7Bwdo1BfI,1843
|
|
109
109
|
unstructured_ingest/processes/connectors/fsspec/azure.py,sha256=31VNiG5YnXfhrFX7QJ2O1ubeWHxbe1sYVIztefbscAQ,7148
|
|
110
110
|
unstructured_ingest/processes/connectors/fsspec/box.py,sha256=1gLS7xR2vbjgKBrQ4ZpI1fKTsJuIDfXuAzx_a4FzxG4,5873
|
|
111
111
|
unstructured_ingest/processes/connectors/fsspec/dropbox.py,sha256=HwwKjQmjM7yFk9Esh_F20xDisRPXGUkFduzaasByRDE,8355
|
|
112
|
-
unstructured_ingest/processes/connectors/fsspec/fsspec.py,sha256=
|
|
112
|
+
unstructured_ingest/processes/connectors/fsspec/fsspec.py,sha256=fA9jtXnr1P4wr8VBpZ1Lx9TsZzH-FDqHoBvPUH0DnWk,17827
|
|
113
113
|
unstructured_ingest/processes/connectors/fsspec/gcs.py,sha256=ouxISCKpZTAj3T6pWGYbASu93wytJjl5WSICvQcrgfE,7172
|
|
114
114
|
unstructured_ingest/processes/connectors/fsspec/s3.py,sha256=P5nd3hamhLFO3l5nV3lMuIxHtb_rZYFP4F6q_py3xpc,7492
|
|
115
115
|
unstructured_ingest/processes/connectors/fsspec/sftp.py,sha256=pR_a2SgLjt8ffNkariHrPB1E0HVSTj5h3pt7KxTU3TI,6371
|
|
@@ -228,15 +228,15 @@ unstructured_ingest/utils/compression.py,sha256=PPC-ys3qEAtELf6-irhp8v8M634pFFCJ
|
|
|
228
228
|
unstructured_ingest/utils/constants.py,sha256=pDspTYz-nEojHBqrZNfssGEiujmVa02pIWL63PQP9sU,103
|
|
229
229
|
unstructured_ingest/utils/data_prep.py,sha256=yqrv7x_nlj0y3uaN0m0Bnsekb7VIQnwABWPa24KU5QI,7426
|
|
230
230
|
unstructured_ingest/utils/dep_check.py,sha256=SXXcUna2H0RtxA6j1S2NGkvQa9JP2DujWhmyBa7776Y,2400
|
|
231
|
-
unstructured_ingest/utils/filesystem.py,sha256=
|
|
231
|
+
unstructured_ingest/utils/filesystem.py,sha256=QY-On6RNVj9IK524eYQLRkP6oTZ0PuQ8WKqnWOV_tXU,1028
|
|
232
232
|
unstructured_ingest/utils/html.py,sha256=lm5lVYhVl7ztntquxzMLVQ8EmK7wkvYgNvlIuHnenoM,6865
|
|
233
233
|
unstructured_ingest/utils/ndjson.py,sha256=nz8VUOPEgAFdhaDOpuveknvCU4x82fVwqE01qAbElH0,1201
|
|
234
234
|
unstructured_ingest/utils/pydantic_models.py,sha256=BT_j15e4rX40wQbt8LUXbqfPhA3rJn1PHTI_G_A_EHY,1720
|
|
235
235
|
unstructured_ingest/utils/string_and_date_utils.py,sha256=oXOI6rxXq-8ncbk7EoJK0WCcTXWj75EzKl8pfQMID3U,2522
|
|
236
236
|
unstructured_ingest/utils/table.py,sha256=WZechczgVFvlodUWFcsnCGvBNh1xRm6hr0VbJTPxKAc,3669
|
|
237
237
|
unstructured_ingest/utils/tls.py,sha256=Ra8Mii1F4VqErRreg76PBI0eAqPBC009l0sSHa8FdnA,448
|
|
238
|
-
unstructured_ingest-1.
|
|
239
|
-
unstructured_ingest-1.
|
|
240
|
-
unstructured_ingest-1.
|
|
241
|
-
unstructured_ingest-1.
|
|
242
|
-
unstructured_ingest-1.
|
|
238
|
+
unstructured_ingest-1.2.0.dist-info/METADATA,sha256=3D67Gk9trwGIVvMh0oSUDx_aJDsYCHD0qwL1VB9ZoYw,8826
|
|
239
|
+
unstructured_ingest-1.2.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
240
|
+
unstructured_ingest-1.2.0.dist-info/entry_points.txt,sha256=gUAAFnjFPnBgThJSEbw0N5ZjxtaKlT1s9e05_arQrNw,70
|
|
241
|
+
unstructured_ingest-1.2.0.dist-info/licenses/LICENSE.md,sha256=SxkKP_62uIAKb9mb1eH7FH4Kn2aYT09fgjKpJt5PyTk,11360
|
|
242
|
+
unstructured_ingest-1.2.0.dist-info/RECORD,,
|
|
File without changes
|
{unstructured_ingest-1.1.3.dist-info → unstructured_ingest-1.2.0.dist-info}/entry_points.txt
RENAMED
|
File without changes
|
{unstructured_ingest-1.1.3.dist-info → unstructured_ingest-1.2.0.dist-info}/licenses/LICENSE.md
RENAMED
|
File without changes
|