unstructured-ingest 0.0.25__py3-none-any.whl → 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of unstructured-ingest might be problematic. Click here for more details.

Files changed (86) hide show
  1. test/__init__.py +0 -0
  2. test/integration/__init__.py +0 -0
  3. test/integration/chunkers/__init__.py +0 -0
  4. test/integration/chunkers/test_chunkers.py +42 -0
  5. test/integration/connectors/__init__.py +0 -0
  6. test/integration/connectors/conftest.py +15 -0
  7. test/integration/connectors/databricks_tests/__init__.py +0 -0
  8. test/integration/connectors/databricks_tests/test_volumes_native.py +165 -0
  9. test/integration/connectors/sql/__init__.py +0 -0
  10. test/integration/connectors/sql/test_postgres.py +178 -0
  11. test/integration/connectors/sql/test_sqlite.py +151 -0
  12. test/integration/connectors/test_s3.py +152 -0
  13. test/integration/connectors/utils/__init__.py +0 -0
  14. test/integration/connectors/utils/constants.py +7 -0
  15. test/integration/connectors/utils/docker_compose.py +44 -0
  16. test/integration/connectors/utils/validation.py +203 -0
  17. test/integration/embedders/__init__.py +0 -0
  18. test/integration/embedders/conftest.py +13 -0
  19. test/integration/embedders/test_bedrock.py +49 -0
  20. test/integration/embedders/test_huggingface.py +26 -0
  21. test/integration/embedders/test_mixedbread.py +47 -0
  22. test/integration/embedders/test_octoai.py +41 -0
  23. test/integration/embedders/test_openai.py +41 -0
  24. test/integration/embedders/test_vertexai.py +41 -0
  25. test/integration/embedders/test_voyageai.py +41 -0
  26. test/integration/embedders/togetherai.py +43 -0
  27. test/integration/embedders/utils.py +44 -0
  28. test/integration/partitioners/__init__.py +0 -0
  29. test/integration/partitioners/test_partitioner.py +75 -0
  30. test/integration/utils.py +15 -0
  31. test/unit/__init__.py +0 -0
  32. test/unit/embed/__init__.py +0 -0
  33. test/unit/embed/test_mixedbreadai.py +41 -0
  34. test/unit/embed/test_octoai.py +20 -0
  35. test/unit/embed/test_openai.py +20 -0
  36. test/unit/embed/test_vertexai.py +25 -0
  37. test/unit/embed/test_voyageai.py +24 -0
  38. test/unit/test_chunking_utils.py +36 -0
  39. test/unit/test_error.py +27 -0
  40. test/unit/test_interfaces.py +280 -0
  41. test/unit/test_interfaces_v2.py +26 -0
  42. test/unit/test_logger.py +78 -0
  43. test/unit/test_utils.py +164 -0
  44. test/unit/test_utils_v2.py +82 -0
  45. unstructured_ingest/__version__.py +1 -1
  46. unstructured_ingest/cli/interfaces.py +2 -2
  47. unstructured_ingest/connector/notion/types/block.py +1 -0
  48. unstructured_ingest/connector/notion/types/database.py +1 -0
  49. unstructured_ingest/connector/notion/types/page.py +1 -0
  50. unstructured_ingest/embed/bedrock.py +0 -20
  51. unstructured_ingest/embed/huggingface.py +0 -21
  52. unstructured_ingest/embed/interfaces.py +29 -3
  53. unstructured_ingest/embed/mixedbreadai.py +0 -36
  54. unstructured_ingest/embed/octoai.py +2 -24
  55. unstructured_ingest/embed/openai.py +0 -20
  56. unstructured_ingest/embed/togetherai.py +40 -0
  57. unstructured_ingest/embed/vertexai.py +0 -20
  58. unstructured_ingest/embed/voyageai.py +1 -24
  59. unstructured_ingest/interfaces.py +1 -1
  60. unstructured_ingest/v2/cli/utils/click.py +21 -2
  61. unstructured_ingest/v2/interfaces/connector.py +22 -2
  62. unstructured_ingest/v2/interfaces/downloader.py +1 -0
  63. unstructured_ingest/v2/processes/chunker.py +1 -1
  64. unstructured_ingest/v2/processes/connectors/__init__.py +5 -18
  65. unstructured_ingest/v2/processes/connectors/databricks/__init__.py +52 -0
  66. unstructured_ingest/v2/processes/connectors/databricks/volumes.py +175 -0
  67. unstructured_ingest/v2/processes/connectors/databricks/volumes_aws.py +87 -0
  68. unstructured_ingest/v2/processes/connectors/databricks/volumes_azure.py +102 -0
  69. unstructured_ingest/v2/processes/connectors/databricks/volumes_gcp.py +85 -0
  70. unstructured_ingest/v2/processes/connectors/databricks/volumes_native.py +86 -0
  71. unstructured_ingest/v2/processes/connectors/fsspec/fsspec.py +17 -0
  72. unstructured_ingest/v2/processes/connectors/kdbai.py +14 -6
  73. unstructured_ingest/v2/processes/connectors/mongodb.py +223 -3
  74. unstructured_ingest/v2/processes/connectors/sql/__init__.py +13 -0
  75. unstructured_ingest/v2/processes/connectors/sql/postgres.py +177 -0
  76. unstructured_ingest/v2/processes/connectors/sql/sql.py +310 -0
  77. unstructured_ingest/v2/processes/connectors/sql/sqlite.py +172 -0
  78. unstructured_ingest/v2/processes/embedder.py +13 -0
  79. unstructured_ingest/v2/processes/partitioner.py +2 -1
  80. {unstructured_ingest-0.0.25.dist-info → unstructured_ingest-0.1.1.dist-info}/METADATA +16 -14
  81. {unstructured_ingest-0.0.25.dist-info → unstructured_ingest-0.1.1.dist-info}/RECORD +85 -31
  82. {unstructured_ingest-0.0.25.dist-info → unstructured_ingest-0.1.1.dist-info}/top_level.txt +1 -0
  83. unstructured_ingest/v2/processes/connectors/sql.py +0 -275
  84. {unstructured_ingest-0.0.25.dist-info → unstructured_ingest-0.1.1.dist-info}/LICENSE.md +0 -0
  85. {unstructured_ingest-0.0.25.dist-info → unstructured_ingest-0.1.1.dist-info}/WHEEL +0 -0
  86. {unstructured_ingest-0.0.25.dist-info → unstructured_ingest-0.1.1.dist-info}/entry_points.txt +0 -0
@@ -0,0 +1,41 @@
1
+ import json
2
+ import os
3
+ from pathlib import Path
4
+
5
+ from test.integration.embedders.utils import validate_embedding_output, validate_raw_embedder
6
+ from test.integration.utils import requires_env
7
+ from unstructured_ingest.embed.voyageai import VoyageAIEmbeddingConfig, VoyageAIEmbeddingEncoder
8
+ from unstructured_ingest.v2.processes.embedder import Embedder, EmbedderConfig
9
+
10
+ API_KEY = "VOYAGEAI_API_KEY"
11
+
12
+
13
+ def get_api_key() -> str:
14
+ api_key = os.getenv(API_KEY, None)
15
+ assert api_key
16
+ return api_key
17
+
18
+
19
+ @requires_env(API_KEY)
20
+ def test_voyageai_embedder(embedder_file: Path):
21
+ api_key = get_api_key()
22
+ embedder_config = EmbedderConfig(embedding_provider="voyageai", embedding_api_key=api_key)
23
+ embedder = Embedder(config=embedder_config)
24
+ results = embedder.run(elements_filepath=embedder_file)
25
+ assert results
26
+ with embedder_file.open("r") as f:
27
+ original_elements = json.load(f)
28
+ validate_embedding_output(original_elements=original_elements, output_elements=results)
29
+
30
+
31
+ @requires_env(API_KEY)
32
+ def test_raw_voyageai_embedder(embedder_file: Path):
33
+ api_key = get_api_key()
34
+ embedder = VoyageAIEmbeddingEncoder(
35
+ config=VoyageAIEmbeddingConfig(
36
+ api_key=api_key,
37
+ )
38
+ )
39
+ validate_raw_embedder(
40
+ embedder=embedder, embedder_file=embedder_file, expected_dimensions=(1024,)
41
+ )
@@ -0,0 +1,43 @@
1
+ import json
2
+ import os
3
+ from pathlib import Path
4
+
5
+ from test.integration.embedders.utils import validate_embedding_output, validate_raw_embedder
6
+ from test.integration.utils import requires_env
7
+ from unstructured_ingest.embed.togetherai import (
8
+ TogetherAIEmbeddingConfig,
9
+ TogetherAIEmbeddingEncoder,
10
+ )
11
+ from unstructured_ingest.v2.processes.embedder import Embedder, EmbedderConfig
12
+
13
+ API_KEY = "TOGETHERAI_API_KEY"
14
+
15
+
16
+ def get_api_key() -> str:
17
+ api_key = os.getenv(API_KEY, None)
18
+ assert api_key
19
+ return api_key
20
+
21
+
22
+ @requires_env(API_KEY)
23
+ def test_togetherai_embedder(embedder_file: Path):
24
+ api_key = get_api_key()
25
+ embedder_config = EmbedderConfig(embedding_provider="togetherai", embedding_api_key=api_key)
26
+ embedder = Embedder(config=embedder_config)
27
+ results = embedder.run(elements_filepath=embedder_file)
28
+ assert results
29
+ with embedder_file.open("r") as f:
30
+ original_elements = json.load(f)
31
+ validate_embedding_output(original_elements=original_elements, output_elements=results)
32
+
33
+
34
+ @requires_env(API_KEY)
35
+ def test_raw_togetherai_embedder(embedder_file: Path):
36
+ api_key = get_api_key()
37
+ embedder = TogetherAIEmbeddingEncoder(config=TogetherAIEmbeddingConfig(api_key=api_key))
38
+ validate_raw_embedder(
39
+ embedder=embedder,
40
+ embedder_file=embedder_file,
41
+ expected_dimensions=(768,),
42
+ expected_is_unit_vector=False,
43
+ )
@@ -0,0 +1,44 @@
1
+ import json
2
+ from pathlib import Path
3
+ from typing import Optional
4
+
5
+ from unstructured_ingest.embed.interfaces import BaseEmbeddingEncoder
6
+
7
+
8
+ def validate_embedding_output(original_elements: list[dict], output_elements: list[dict]):
9
+ """
10
+ Make sure the following characteristics are met:
11
+ * The same number of elements are returned
12
+ * For each element that had text, an embeddings entry was added in the output
13
+ * Other than the embedding, nothing about the element was changed
14
+ """
15
+ assert len(original_elements) == len(output_elements)
16
+ for original_element, output_element in zip(original_elements, output_elements):
17
+ if original_element.get("text"):
18
+ assert output_element.get("embeddings", None)
19
+ output_element.pop("embeddings", None)
20
+ assert original_element == output_element
21
+
22
+
23
+ def validate_raw_embedder(
24
+ embedder: BaseEmbeddingEncoder,
25
+ embedder_file: Path,
26
+ expected_dimensions: Optional[tuple[int, ...]] = None,
27
+ expected_is_unit_vector: bool = True,
28
+ ):
29
+ with open(embedder_file) as f:
30
+ elements = json.load(f)
31
+ all_text = [element["text"] for element in elements]
32
+ single_text = all_text[0]
33
+ num_of_dimensions = embedder.num_of_dimensions
34
+ if expected_dimensions:
35
+ assert (
36
+ num_of_dimensions == expected_dimensions
37
+ ), f"number of dimensions {num_of_dimensions} didn't match expected: {expected_dimensions}"
38
+ is_unit_vector = embedder.is_unit_vector
39
+ assert is_unit_vector == expected_is_unit_vector
40
+ single_embedding = embedder.embed_query(query=single_text)
41
+ expected_length = num_of_dimensions[0]
42
+ assert len(single_embedding) == expected_length
43
+ embedded_elements = embedder.embed_documents(elements=elements)
44
+ validate_embedding_output(original_elements=elements, output_elements=embedded_elements)
File without changes
@@ -0,0 +1,75 @@
1
+ import json
2
+ import os
3
+ from pathlib import Path
4
+
5
+ import pytest
6
+ from unstructured_client.models.errors.sdkerror import SDKError
7
+
8
+ from test.integration.utils import requires_env
9
+ from unstructured_ingest.v2.processes.partitioner import Partitioner, PartitionerConfig
10
+
11
+ int_test_dir = Path(__file__).parent
12
+ assets_dir = int_test_dir / "assets"
13
+
14
+ all_partition_files = [path for path in assets_dir.iterdir() if path.is_file()]
15
+ non_image_partition_files = [
16
+ path for path in all_partition_files if path.suffix not in [".jpg", ".png", ".tif"]
17
+ ]
18
+ image_partition_files = [
19
+ path for path in all_partition_files if path not in non_image_partition_files
20
+ ]
21
+
22
+
23
+ @pytest.mark.parametrize(
24
+ "partition_file", all_partition_files, ids=[path.name for path in all_partition_files]
25
+ )
26
+ @requires_env("UNSTRUCTURED_API_KEY", "UNSTRUCTURED_API_URL")
27
+ @pytest.mark.asyncio
28
+ async def test_partitioner_api_hi_res(partition_file: Path):
29
+ api_key = os.getenv("UNSTRUCTURED_API_KEY")
30
+ api_url = os.getenv("UNSTRUCTURED_API_URL")
31
+ partitioner_config = PartitionerConfig(
32
+ strategy="hi_res", partition_by_api=True, api_key=api_key, partition_endpoint=api_url
33
+ )
34
+ partitioner = Partitioner(config=partitioner_config)
35
+ results = await partitioner.run_async(filename=partition_file)
36
+ results_dir = int_test_dir / "results"
37
+ results_dir.mkdir(exist_ok=True)
38
+ results_path = results_dir / f"{partition_file.name}.json"
39
+ with results_path.open("w") as f:
40
+ json.dump(results, f, indent=2)
41
+ assert results
42
+
43
+
44
+ @pytest.mark.parametrize(
45
+ "partition_file",
46
+ non_image_partition_files,
47
+ ids=[path.name for path in non_image_partition_files],
48
+ )
49
+ @requires_env("UNSTRUCTURED_API_KEY", "UNSTRUCTURED_API_URL")
50
+ @pytest.mark.asyncio
51
+ async def test_partitioner_api_fast(partition_file: Path):
52
+ api_key = os.getenv("UNSTRUCTURED_API_KEY")
53
+ api_url = os.getenv("UNSTRUCTURED_API_URL")
54
+ partitioner_config = PartitionerConfig(
55
+ strategy="fast", partition_by_api=True, api_key=api_key, partition_endpoint=api_url
56
+ )
57
+ partitioner = Partitioner(config=partitioner_config)
58
+ results = await partitioner.run_async(filename=partition_file)
59
+ assert results
60
+
61
+
62
+ @pytest.mark.parametrize(
63
+ "partition_file", image_partition_files, ids=[path.name for path in image_partition_files]
64
+ )
65
+ @requires_env("UNSTRUCTURED_API_KEY", "UNSTRUCTURED_API_URL")
66
+ @pytest.mark.asyncio
67
+ async def test_partitioner_api_fast_error(partition_file: Path):
68
+ api_key = os.getenv("UNSTRUCTURED_API_KEY")
69
+ api_url = os.getenv("UNSTRUCTURED_API_URL")
70
+ partitioner_config = PartitionerConfig(
71
+ strategy="fast", partition_by_api=True, api_key=api_key, partition_endpoint=api_url
72
+ )
73
+ partitioner = Partitioner(config=partitioner_config)
74
+ with pytest.raises(SDKError):
75
+ await partitioner.run_async(filename=partition_file)
@@ -0,0 +1,15 @@
1
+ import os
2
+
3
+ import pytest
4
+
5
+
6
+ def requires_env(*envs):
7
+ if len(envs) == 1:
8
+ env = envs[0]
9
+ return pytest.mark.skipif(
10
+ env not in os.environ, reason=f"Environment variable not set: {env}"
11
+ )
12
+ return pytest.mark.skipif(
13
+ not all(env in os.environ for env in envs),
14
+ reason="All required environment variables not set: {}".format(", ".join(envs)),
15
+ )
test/unit/__init__.py ADDED
File without changes
File without changes
@@ -0,0 +1,41 @@
1
+ from unstructured_ingest.embed.mixedbreadai import (
2
+ MixedbreadAIEmbeddingConfig,
3
+ MixedbreadAIEmbeddingEncoder,
4
+ )
5
+
6
+
7
+ def test_embed_documents_does_not_break_element_to_dict(mocker):
8
+ mock_client = mocker.MagicMock()
9
+
10
+ def mock_embeddings(
11
+ model,
12
+ normalized,
13
+ encoding_format,
14
+ truncation_strategy,
15
+ request_options,
16
+ input,
17
+ ):
18
+ mock_response = mocker.MagicMock()
19
+ mock_response.data = [mocker.MagicMock(embedding=[i, i + 1]) for i in range(len(input))]
20
+ return mock_response
21
+
22
+ mock_client.embeddings.side_effect = mock_embeddings
23
+
24
+ # Mock get_client to return our mock_client
25
+ mocker.patch.object(MixedbreadAIEmbeddingConfig, "get_client", return_value=mock_client)
26
+
27
+ encoder = MixedbreadAIEmbeddingEncoder(
28
+ config=MixedbreadAIEmbeddingConfig(
29
+ api_key="api_key", model_name="mixedbread-ai/mxbai-embed-large-v1"
30
+ )
31
+ )
32
+
33
+ raw_elements = [{"text": f"This is sentence {i+1}"} for i in range(2)]
34
+ elements = encoder.embed_documents(
35
+ elements=raw_elements,
36
+ )
37
+ assert len(elements) == 2
38
+ assert elements[0]["text"] == "This is sentence 1"
39
+ assert elements[1]["text"] == "This is sentence 2"
40
+ assert elements[0]["embeddings"] is not None
41
+ assert elements[1]["embeddings"] is not None
@@ -0,0 +1,20 @@
1
+ from unstructured_ingest.embed.octoai import OctoAiEmbeddingConfig, OctoAIEmbeddingEncoder
2
+
3
+
4
+ def test_embed_documents_does_not_break_element_to_dict(mocker):
5
+ # Mocked client with the desired behavior for embed_documents
6
+ mock_client = mocker.MagicMock()
7
+ mock_client.embed_documents.return_value = [1, 2]
8
+
9
+ # Mock get_client to return our mock_client
10
+ mocker.patch.object(OctoAiEmbeddingConfig, "get_client", return_value=mock_client)
11
+
12
+ encoder = OctoAIEmbeddingEncoder(config=OctoAiEmbeddingConfig(api_key="api_key"))
13
+ raw_elements = [{"text": f"This is sentence {i+1}"} for i in range(2)]
14
+
15
+ elements = encoder.embed_documents(
16
+ elements=raw_elements,
17
+ )
18
+ assert len(elements) == 2
19
+ assert elements[0]["text"] == "This is sentence 1"
20
+ assert elements[1]["text"] == "This is sentence 2"
@@ -0,0 +1,20 @@
1
+ from unstructured_ingest.embed.openai import OpenAIEmbeddingConfig, OpenAIEmbeddingEncoder
2
+
3
+
4
+ def test_embed_documents_does_not_break_element_to_dict(mocker):
5
+ # Mocked client with the desired behavior for embed_documents
6
+ mock_client = mocker.MagicMock()
7
+ mock_client.embed_documents.return_value = [1, 2]
8
+
9
+ # Mock get_client to return our mock_client
10
+ mocker.patch.object(OpenAIEmbeddingConfig, "get_client", return_value=mock_client)
11
+
12
+ encoder = OpenAIEmbeddingEncoder(config=OpenAIEmbeddingConfig(api_key="api_key"))
13
+ raw_elements = [{"text": f"This is sentence {i+1}"} for i in range(2)]
14
+
15
+ elements = encoder.embed_documents(
16
+ elements=raw_elements,
17
+ )
18
+ assert len(elements) == 2
19
+ assert elements[0]["text"] == "This is sentence 1"
20
+ assert elements[1]["text"] == "This is sentence 2"
@@ -0,0 +1,25 @@
1
+ from unstructured_ingest.embed.vertexai import VertexAIEmbeddingConfig, VertexAIEmbeddingEncoder
2
+
3
+
4
+ def test_embed_documents_does_not_break_element_to_dict(mocker):
5
+ # Mocked client with the desired behavior for embed_documents
6
+ mock_responses = []
7
+ for i in [1, 2]:
8
+ mock_response = mocker.Mock()
9
+ mocker.patch.object(mock_response, "values", i)
10
+ mock_responses.append(mock_response)
11
+
12
+ mock_client = mocker.MagicMock()
13
+ mock_client.get_embeddings.return_value = mock_responses
14
+
15
+ # Mock create_client to return our mock_client
16
+ mocker.patch.object(VertexAIEmbeddingConfig, "get_client", return_value=mock_client)
17
+ encoder = VertexAIEmbeddingEncoder(config=VertexAIEmbeddingConfig(api_key={"api_key": "value"}))
18
+ raw_elements = [{"text": f"This is sentence {i+1}"} for i in range(2)]
19
+
20
+ elements = encoder.embed_documents(
21
+ elements=raw_elements,
22
+ )
23
+ assert len(elements) == 2
24
+ assert elements[0]["text"] == "This is sentence 1"
25
+ assert elements[1]["text"] == "This is sentence 2"
@@ -0,0 +1,24 @@
1
+ from unstructured_ingest.embed.voyageai import VoyageAIEmbeddingConfig, VoyageAIEmbeddingEncoder
2
+
3
+
4
+ def test_embed_documents_does_not_break_element_to_dict(mocker):
5
+ # Mocked client with the desired behavior for embed_documents
6
+ mock_response = mocker.MagicMock()
7
+ mocker.patch.object(mock_response, "embeddings", [1, 2])
8
+ mock_client = mocker.MagicMock()
9
+ mock_client.embed.return_value = mock_response
10
+
11
+ # Mock get_client to return our mock_client
12
+ mocker.patch.object(VoyageAIEmbeddingConfig, "get_client", return_value=mock_client)
13
+
14
+ encoder = VoyageAIEmbeddingEncoder(
15
+ config=VoyageAIEmbeddingConfig(api_key="api_key", model_name="voyage-law-2")
16
+ )
17
+ raw_elements = [{"text": f"This is sentence {i+1}"} for i in range(2)]
18
+
19
+ elements = encoder.embed_documents(
20
+ elements=raw_elements,
21
+ )
22
+ assert len(elements) == 2
23
+ assert elements[0]["text"] == "This is sentence 1"
24
+ assert elements[1]["text"] == "This is sentence 2"
@@ -0,0 +1,36 @@
1
+ from pathlib import Path
2
+
3
+ import pytest
4
+ from unstructured.chunking import dispatch
5
+ from unstructured.documents.elements import assign_and_map_hash_ids
6
+ from unstructured.partition.auto import partition
7
+
8
+ from unstructured_ingest.utils.chunking import (
9
+ assign_and_map_hash_ids as new_assign_and_map_hash_ids,
10
+ )
11
+
12
+ test_file_path = Path(__file__).resolve()
13
+ project_root = test_file_path.parents[2]
14
+ docs_path = project_root / "example-docs"
15
+
16
+
17
+ @pytest.mark.parametrize(
18
+ "chunking_strategy",
19
+ ["basic", "by_title"],
20
+ )
21
+ def test_assign_and_map_hash_ids(chunking_strategy):
22
+ # Make sure the new logic working on dict content matches the
23
+ # results if using the unstructured version
24
+ file_path = docs_path / "book-war-and-peace-1p.txt"
25
+ elements = partition(filename=str(file_path.resolve()), strategy="fast")
26
+ chunked_elements = dispatch.chunk(elements=elements, chunking_strategy=chunking_strategy)
27
+ chunked_elements_copy = chunked_elements.copy()
28
+
29
+ hashed_chunked_elements = assign_and_map_hash_ids(chunked_elements)
30
+ og_chunked_elements_dicts = [e.to_dict() for e in hashed_chunked_elements]
31
+
32
+ new_chunked_elements_dicts = [e.to_dict() for e in chunked_elements_copy]
33
+ new_chunked_elements_dicts = new_assign_and_map_hash_ids(new_chunked_elements_dicts)
34
+
35
+ for e1, e2 in zip(og_chunked_elements_dicts, new_chunked_elements_dicts):
36
+ assert e1 == e2
@@ -0,0 +1,27 @@
1
+ import pytest
2
+
3
+ from unstructured_ingest.error import (
4
+ DestinationConnectionError,
5
+ PartitionError,
6
+ SourceConnectionError,
7
+ )
8
+
9
+
10
+ @pytest.mark.parametrize(
11
+ ("error_class", "exception_type", "error_message"),
12
+ [
13
+ (SourceConnectionError, ValueError, "Simulated connection error"),
14
+ (DestinationConnectionError, RuntimeError, "Simulated connection error"),
15
+ (PartitionError, FileNotFoundError, "Simulated partition error"),
16
+ ],
17
+ )
18
+ def test_custom_error_decorator(error_class, exception_type, error_message):
19
+ @error_class.wrap
20
+ def simulate_error():
21
+ raise exception_type(error_message)
22
+
23
+ with pytest.raises(error_class) as context:
24
+ simulate_error()
25
+
26
+ expected_error_string = error_class.error_string.format(error_message)
27
+ assert str(context.value) == expected_error_string