unstructured-ingest 0.5.9__py3-none-any.whl → 0.5.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of unstructured-ingest might be problematic. Click here for more details.
- test/integration/connectors/test_dropbox.py +151 -0
- test/integration/connectors/test_jira.py +67 -0
- test/unit/test_utils.py +27 -0
- test/unit/v2/connectors/test_jira.py +401 -0
- unstructured_ingest/__version__.py +1 -1
- unstructured_ingest/embed/openai.py +4 -3
- unstructured_ingest/utils/string_and_date_utils.py +25 -0
- unstructured_ingest/v2/processes/connectors/__init__.py +4 -0
- unstructured_ingest/v2/processes/connectors/confluence.py +2 -2
- unstructured_ingest/v2/processes/connectors/delta_table.py +2 -0
- unstructured_ingest/v2/processes/connectors/fsspec/dropbox.py +78 -15
- unstructured_ingest/v2/processes/connectors/jira.py +453 -0
- unstructured_ingest/v2/processes/partitioner.py +2 -5
- unstructured_ingest/v2/unstructured_api.py +7 -0
- {unstructured_ingest-0.5.9.dist-info → unstructured_ingest-0.5.10.dist-info}/METADATA +16 -16
- {unstructured_ingest-0.5.9.dist-info → unstructured_ingest-0.5.10.dist-info}/RECORD +20 -16
- {unstructured_ingest-0.5.9.dist-info → unstructured_ingest-0.5.10.dist-info}/LICENSE.md +0 -0
- {unstructured_ingest-0.5.9.dist-info → unstructured_ingest-0.5.10.dist-info}/WHEEL +0 -0
- {unstructured_ingest-0.5.9.dist-info → unstructured_ingest-0.5.10.dist-info}/entry_points.txt +0 -0
- {unstructured_ingest-0.5.9.dist-info → unstructured_ingest-0.5.10.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,151 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
import requests
|
|
5
|
+
|
|
6
|
+
from test.integration.connectors.utils.constants import (
|
|
7
|
+
BLOB_STORAGE_TAG,
|
|
8
|
+
SOURCE_TAG,
|
|
9
|
+
)
|
|
10
|
+
from test.integration.connectors.utils.validation.source import (
|
|
11
|
+
SourceValidationConfigs,
|
|
12
|
+
source_connector_validation,
|
|
13
|
+
)
|
|
14
|
+
from test.integration.utils import requires_env
|
|
15
|
+
from unstructured_ingest.v2.processes.connectors.fsspec.dropbox import (
|
|
16
|
+
CONNECTOR_TYPE as DROPBOX_CONNECTOR_TYPE,
|
|
17
|
+
)
|
|
18
|
+
from unstructured_ingest.v2.processes.connectors.fsspec.dropbox import (
|
|
19
|
+
DropboxAccessConfig,
|
|
20
|
+
DropboxConnectionConfig,
|
|
21
|
+
DropboxDownloader,
|
|
22
|
+
DropboxDownloaderConfig,
|
|
23
|
+
DropboxIndexer,
|
|
24
|
+
DropboxIndexerConfig,
|
|
25
|
+
)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@pytest.mark.asyncio
|
|
29
|
+
@pytest.mark.tags(DROPBOX_CONNECTOR_TYPE, SOURCE_TAG, BLOB_STORAGE_TAG)
|
|
30
|
+
@requires_env("DROPBOX_REFRESH_TOKEN", "DROPBOX_APP_KEY", "DROPBOX_APP_SECRET")
|
|
31
|
+
async def test_dropbox_source(temp_dir):
|
|
32
|
+
"""
|
|
33
|
+
Integration test for the Dropbox source connector.
|
|
34
|
+
|
|
35
|
+
This test indexes data from dropbox://test-input/ and downloads the resulting files,
|
|
36
|
+
then compares them to fixture data.
|
|
37
|
+
"""
|
|
38
|
+
refresh_token = os.getenv("DROPBOX_REFRESH_TOKEN")
|
|
39
|
+
app_key = os.getenv("DROPBOX_APP_KEY")
|
|
40
|
+
app_secret = os.getenv("DROPBOX_APP_SECRET")
|
|
41
|
+
|
|
42
|
+
connection_config = DropboxConnectionConfig(
|
|
43
|
+
access_config=DropboxAccessConfig(
|
|
44
|
+
refresh_token=refresh_token,
|
|
45
|
+
app_key=app_key,
|
|
46
|
+
app_secret=app_secret,
|
|
47
|
+
)
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
index_config = DropboxIndexerConfig(
|
|
51
|
+
recursive=True,
|
|
52
|
+
remote_url="dropbox://test-input",
|
|
53
|
+
)
|
|
54
|
+
downloader_config = DropboxDownloaderConfig(download_dir=temp_dir)
|
|
55
|
+
|
|
56
|
+
indexer = DropboxIndexer(
|
|
57
|
+
connection_config=connection_config,
|
|
58
|
+
index_config=index_config,
|
|
59
|
+
)
|
|
60
|
+
downloader = DropboxDownloader(
|
|
61
|
+
connection_config=connection_config,
|
|
62
|
+
download_config=downloader_config,
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
await source_connector_validation(
|
|
66
|
+
indexer=indexer,
|
|
67
|
+
downloader=downloader,
|
|
68
|
+
configs=SourceValidationConfigs(
|
|
69
|
+
test_id="dropbox",
|
|
70
|
+
expected_num_files=4,
|
|
71
|
+
validate_downloaded_files=True,
|
|
72
|
+
exclude_fields_extend=[
|
|
73
|
+
"metadata.date_created",
|
|
74
|
+
"metadata.date_modified",
|
|
75
|
+
],
|
|
76
|
+
),
|
|
77
|
+
)
|
|
78
|
+
|
|
79
|
+
|
|
80
|
+
@pytest.mark.asyncio
|
|
81
|
+
@pytest.mark.tags(DROPBOX_CONNECTOR_TYPE, SOURCE_TAG, BLOB_STORAGE_TAG)
|
|
82
|
+
@requires_env("DROPBOX_REFRESH_TOKEN", "DROPBOX_APP_KEY", "DROPBOX_APP_SECRET")
|
|
83
|
+
async def test_dropbox_short_lived_token_via_refresh(temp_dir):
|
|
84
|
+
"""
|
|
85
|
+
Demonstrates manually generating an access token from refresh credentials,
|
|
86
|
+
then passing ONLY the short-lived token to the Dropbox connector
|
|
87
|
+
(no app_key, app_secret, or refresh_token in the actual connection config).
|
|
88
|
+
|
|
89
|
+
This effectively mimics an external system that hands us a short-lived token.
|
|
90
|
+
"""
|
|
91
|
+
refresh_token = os.getenv("DROPBOX_REFRESH_TOKEN")
|
|
92
|
+
app_key = os.getenv("DROPBOX_APP_KEY")
|
|
93
|
+
app_secret = os.getenv("DROPBOX_APP_SECRET")
|
|
94
|
+
|
|
95
|
+
# Manually request a short-lived token from Dropbox's OAuth endpoint
|
|
96
|
+
# This call is basically what the connector code does internally,
|
|
97
|
+
# but we're doing it here in the test so we can pass only the short-lived token later.
|
|
98
|
+
response = requests.post(
|
|
99
|
+
"https://api.dropboxapi.com/oauth2/token",
|
|
100
|
+
data={
|
|
101
|
+
"grant_type": "refresh_token",
|
|
102
|
+
"refresh_token": refresh_token,
|
|
103
|
+
},
|
|
104
|
+
auth=(app_key, app_secret),
|
|
105
|
+
timeout=30, # seconds
|
|
106
|
+
)
|
|
107
|
+
response.raise_for_status()
|
|
108
|
+
data = response.json()
|
|
109
|
+
short_lived_token = data["access_token"]
|
|
110
|
+
print("Acquired an access token from Dropbox")
|
|
111
|
+
|
|
112
|
+
# Build connection config with ONLY the short-lived token
|
|
113
|
+
# We omit refresh_token, app_key, and app_secret to confirm that
|
|
114
|
+
# our connector can operate purely on the short-lived token.
|
|
115
|
+
connection_config = DropboxConnectionConfig(
|
|
116
|
+
access_config=DropboxAccessConfig(
|
|
117
|
+
token=short_lived_token,
|
|
118
|
+
app_key=None,
|
|
119
|
+
app_secret=None,
|
|
120
|
+
refresh_token=None,
|
|
121
|
+
)
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
index_config = DropboxIndexerConfig(
|
|
125
|
+
recursive=True,
|
|
126
|
+
remote_url="dropbox://test-input",
|
|
127
|
+
)
|
|
128
|
+
downloader_config = DropboxDownloaderConfig(download_dir=temp_dir)
|
|
129
|
+
|
|
130
|
+
indexer = DropboxIndexer(
|
|
131
|
+
connection_config=connection_config,
|
|
132
|
+
index_config=index_config,
|
|
133
|
+
)
|
|
134
|
+
downloader = DropboxDownloader(
|
|
135
|
+
connection_config=connection_config,
|
|
136
|
+
download_config=downloader_config,
|
|
137
|
+
)
|
|
138
|
+
|
|
139
|
+
await source_connector_validation(
|
|
140
|
+
indexer=indexer,
|
|
141
|
+
downloader=downloader,
|
|
142
|
+
configs=SourceValidationConfigs(
|
|
143
|
+
test_id="dropbox_short_lived_via_refresh",
|
|
144
|
+
expected_num_files=4,
|
|
145
|
+
validate_downloaded_files=True,
|
|
146
|
+
exclude_fields_extend=[
|
|
147
|
+
"metadata.date_created",
|
|
148
|
+
"metadata.date_modified",
|
|
149
|
+
],
|
|
150
|
+
),
|
|
151
|
+
)
|
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
import os
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from test.integration.connectors.utils.constants import SOURCE_TAG, UNCATEGORIZED_TAG
|
|
6
|
+
from test.integration.connectors.utils.validation.source import (
|
|
7
|
+
SourceValidationConfigs,
|
|
8
|
+
source_connector_validation,
|
|
9
|
+
)
|
|
10
|
+
from test.integration.utils import requires_env
|
|
11
|
+
from unstructured_ingest.v2.processes.connectors.jira import (
|
|
12
|
+
CONNECTOR_TYPE,
|
|
13
|
+
JiraAccessConfig,
|
|
14
|
+
JiraConnectionConfig,
|
|
15
|
+
JiraDownloader,
|
|
16
|
+
JiraDownloaderConfig,
|
|
17
|
+
JiraIndexer,
|
|
18
|
+
JiraIndexerConfig,
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
@pytest.mark.asyncio
|
|
23
|
+
@pytest.mark.tags(CONNECTOR_TYPE, SOURCE_TAG, UNCATEGORIZED_TAG)
|
|
24
|
+
@requires_env("JIRA_INGEST_USER_EMAIL", "JIRA_INGEST_API_TOKEN")
|
|
25
|
+
async def test_jira_source(temp_dir):
|
|
26
|
+
# Retrieve environment variables
|
|
27
|
+
jira_url = os.environ.get(
|
|
28
|
+
"JIRA_INGEST_URL", "https://unstructured-jira-connector-test.atlassian.net"
|
|
29
|
+
)
|
|
30
|
+
user_email = os.environ["JIRA_INGEST_USER_EMAIL"]
|
|
31
|
+
api_token = os.environ["JIRA_INGEST_API_TOKEN"]
|
|
32
|
+
projects = ["JCTP1"]
|
|
33
|
+
boards = ["3"]
|
|
34
|
+
issues = ["JCTP2-1", "JCTP2-2", "JCTP2-3"]
|
|
35
|
+
|
|
36
|
+
# Create connection and indexer configurations
|
|
37
|
+
access_config = JiraAccessConfig(password=api_token)
|
|
38
|
+
connection_config = JiraConnectionConfig(
|
|
39
|
+
url=jira_url,
|
|
40
|
+
username=user_email,
|
|
41
|
+
access_config=access_config,
|
|
42
|
+
)
|
|
43
|
+
index_config = JiraIndexerConfig(projects=projects, boards=boards, issues=issues)
|
|
44
|
+
|
|
45
|
+
download_config = JiraDownloaderConfig(download_dir=temp_dir)
|
|
46
|
+
|
|
47
|
+
# Instantiate indexer and downloader
|
|
48
|
+
indexer = JiraIndexer(
|
|
49
|
+
connection_config=connection_config,
|
|
50
|
+
index_config=index_config,
|
|
51
|
+
)
|
|
52
|
+
downloader = JiraDownloader(
|
|
53
|
+
connection_config=connection_config,
|
|
54
|
+
download_config=download_config,
|
|
55
|
+
)
|
|
56
|
+
|
|
57
|
+
# Run the source connector validation
|
|
58
|
+
await source_connector_validation(
|
|
59
|
+
indexer=indexer,
|
|
60
|
+
downloader=downloader,
|
|
61
|
+
configs=SourceValidationConfigs(
|
|
62
|
+
test_id="jira",
|
|
63
|
+
expected_num_files=8,
|
|
64
|
+
validate_file_data=True,
|
|
65
|
+
validate_downloaded_files=True,
|
|
66
|
+
),
|
|
67
|
+
)
|
test/unit/test_utils.py
CHANGED
|
@@ -10,6 +10,7 @@ from unstructured_ingest.cli.utils import extract_config
|
|
|
10
10
|
from unstructured_ingest.interfaces import BaseConfig
|
|
11
11
|
from unstructured_ingest.utils.string_and_date_utils import (
|
|
12
12
|
ensure_isoformat_datetime,
|
|
13
|
+
fix_unescaped_unicode,
|
|
13
14
|
json_to_dict,
|
|
14
15
|
truncate_string_bytes,
|
|
15
16
|
)
|
|
@@ -182,3 +183,29 @@ def test_truncate_string_bytes_return_untouched_string():
|
|
|
182
183
|
result = truncate_string_bytes(test_string, max_bytes)
|
|
183
184
|
assert result == "abcdef"
|
|
184
185
|
assert len(result.encode("utf-8")) <= max_bytes
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def test_fix_unescaped_unicode_valid():
|
|
189
|
+
text = "This is a test with unescaped unicode: \\u0041"
|
|
190
|
+
expected = "This is a test with unescaped unicode: \u0041"
|
|
191
|
+
assert fix_unescaped_unicode(text) == expected
|
|
192
|
+
|
|
193
|
+
|
|
194
|
+
def test_fix_unescaped_unicode_no_unescaped_chars():
|
|
195
|
+
text = "This is a test with no unescaped unicode: \u0041"
|
|
196
|
+
expected = "This is a test with no unescaped unicode: \u0041"
|
|
197
|
+
assert fix_unescaped_unicode(text) == expected
|
|
198
|
+
|
|
199
|
+
|
|
200
|
+
def test_fix_unescaped_unicode_invalid_unicode():
|
|
201
|
+
text = "This is a test with invalid unescaped unicode: \\uZZZZ"
|
|
202
|
+
expected = "This is a test with invalid unescaped unicode: \\uZZZZ"
|
|
203
|
+
assert fix_unescaped_unicode(text) == expected
|
|
204
|
+
|
|
205
|
+
|
|
206
|
+
def test_fix_unescaped_unicode_encoding_error(caplog: pytest.LogCaptureFixture):
|
|
207
|
+
text = "This is a test with unescaped unicode: \\uD83D"
|
|
208
|
+
fix_unescaped_unicode(text)
|
|
209
|
+
with caplog.at_level("WARNING"):
|
|
210
|
+
fix_unescaped_unicode(text)
|
|
211
|
+
assert "Failed to fix unescaped Unicode sequences" in caplog.text
|
|
@@ -0,0 +1,401 @@
|
|
|
1
|
+
from unittest.mock import MagicMock
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
from pydantic import ValidationError
|
|
5
|
+
from pytest_mock import MockerFixture
|
|
6
|
+
|
|
7
|
+
from unstructured_ingest.v2.processes.connectors.jira import (
|
|
8
|
+
FieldGetter,
|
|
9
|
+
JiraAccessConfig,
|
|
10
|
+
JiraConnectionConfig,
|
|
11
|
+
JiraIndexer,
|
|
12
|
+
JiraIndexerConfig,
|
|
13
|
+
JiraIssueMetadata,
|
|
14
|
+
issues_fetcher_wrapper,
|
|
15
|
+
nested_object_to_field_getter,
|
|
16
|
+
)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
@pytest.fixture
|
|
20
|
+
def jira_connection_config():
|
|
21
|
+
access_config = JiraAccessConfig(password="password")
|
|
22
|
+
return JiraConnectionConfig(
|
|
23
|
+
url="http://localhost:1234",
|
|
24
|
+
username="test@example.com",
|
|
25
|
+
access_config=access_config,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
@pytest.fixture
|
|
30
|
+
def jira_indexer(jira_connection_config: JiraConnectionConfig):
|
|
31
|
+
indexer_config = JiraIndexerConfig(projects=["TEST1"], boards=["2"], issues=["TEST2-1"])
|
|
32
|
+
return JiraIndexer(connection_config=jira_connection_config, index_config=indexer_config)
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@pytest.fixture
|
|
36
|
+
def mock_jira(mocker: MockerFixture):
|
|
37
|
+
mock_client = mocker.patch.object(JiraConnectionConfig, "get_client", autospec=True)
|
|
38
|
+
mock_jira = mocker.MagicMock()
|
|
39
|
+
mock_client.return_value.__enter__.return_value = mock_jira
|
|
40
|
+
return mock_jira
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def test_jira_indexer_precheck_success(
|
|
44
|
+
caplog: pytest.LogCaptureFixture,
|
|
45
|
+
mocker: MockerFixture,
|
|
46
|
+
jira_indexer: JiraIndexer,
|
|
47
|
+
mock_jira: MagicMock,
|
|
48
|
+
):
|
|
49
|
+
get_permissions = mocker.MagicMock()
|
|
50
|
+
get_permissions.return_value = {"permissions": {"BROWSE_PROJECTS": {"havePermission": True}}}
|
|
51
|
+
mock_jira.get_permissions = get_permissions
|
|
52
|
+
|
|
53
|
+
with caplog.at_level("INFO"):
|
|
54
|
+
jira_indexer.precheck()
|
|
55
|
+
assert "Connection to Jira successful." in caplog.text
|
|
56
|
+
|
|
57
|
+
get_permissions.assert_called_once()
|
|
58
|
+
|
|
59
|
+
|
|
60
|
+
def test_jira_indexer_precheck_no_permission(
|
|
61
|
+
mocker: MockerFixture,
|
|
62
|
+
jira_indexer: JiraIndexer,
|
|
63
|
+
mock_jira: MagicMock,
|
|
64
|
+
):
|
|
65
|
+
get_permissions = mocker.MagicMock()
|
|
66
|
+
get_permissions.return_value = {"permissions": {"BROWSE_PROJECTS": {"havePermission": False}}}
|
|
67
|
+
mock_jira.get_permissions = get_permissions
|
|
68
|
+
|
|
69
|
+
with pytest.raises(ValueError):
|
|
70
|
+
jira_indexer.precheck()
|
|
71
|
+
|
|
72
|
+
get_permissions.assert_called_once()
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
@pytest.mark.parametrize(
|
|
76
|
+
("project_issues_count", "expected_issues_count"), [(2, 2), ({"total": 2}, 2), (0, 0)]
|
|
77
|
+
)
|
|
78
|
+
def test_jira_indexer_get_issues_within_single_project(
|
|
79
|
+
jira_indexer: JiraIndexer,
|
|
80
|
+
mock_jira: MagicMock,
|
|
81
|
+
project_issues_count,
|
|
82
|
+
expected_issues_count,
|
|
83
|
+
):
|
|
84
|
+
mock_jira.get_project_issues_count.return_value = project_issues_count
|
|
85
|
+
mock_jira.get_all_project_issues.return_value = [
|
|
86
|
+
{"id": "1", "key": "TEST-1"},
|
|
87
|
+
{"id": "2", "key": "TEST-2"},
|
|
88
|
+
]
|
|
89
|
+
|
|
90
|
+
issues = jira_indexer._get_issues_within_single_project("TEST1")
|
|
91
|
+
assert len(issues) == expected_issues_count
|
|
92
|
+
|
|
93
|
+
if issues:
|
|
94
|
+
assert issues[0].id == "1"
|
|
95
|
+
assert issues[0].key == "TEST-1"
|
|
96
|
+
assert issues[1].id == "2"
|
|
97
|
+
assert issues[1].key == "TEST-2"
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
def test_jira_indexer_get_issues_within_single_project_error(
|
|
101
|
+
jira_indexer: JiraIndexer,
|
|
102
|
+
mock_jira: MagicMock,
|
|
103
|
+
):
|
|
104
|
+
mock_jira.get_project_issues_count.return_value = {}
|
|
105
|
+
|
|
106
|
+
with pytest.raises(KeyError):
|
|
107
|
+
jira_indexer._get_issues_within_single_project("TEST1")
|
|
108
|
+
|
|
109
|
+
|
|
110
|
+
def test_jira_indexer_get_issues_within_projects_with_projects(
|
|
111
|
+
jira_indexer: JiraIndexer,
|
|
112
|
+
mock_jira: MagicMock,
|
|
113
|
+
):
|
|
114
|
+
mock_jira.get_project_issues_count.return_value = 2
|
|
115
|
+
mock_jira.get_all_project_issues.return_value = [
|
|
116
|
+
{"id": "1", "key": "TEST-1"},
|
|
117
|
+
{"id": "2", "key": "TEST-2"},
|
|
118
|
+
]
|
|
119
|
+
|
|
120
|
+
issues = jira_indexer._get_issues_within_projects()
|
|
121
|
+
assert len(issues) == 2
|
|
122
|
+
assert issues[0].id == "1"
|
|
123
|
+
assert issues[0].key == "TEST-1"
|
|
124
|
+
assert issues[1].id == "2"
|
|
125
|
+
assert issues[1].key == "TEST-2"
|
|
126
|
+
|
|
127
|
+
|
|
128
|
+
def test_jira_indexer_get_issues_within_projects_no_projects_with_boards_or_issues(
|
|
129
|
+
mocker: MockerFixture,
|
|
130
|
+
jira_indexer: JiraIndexer,
|
|
131
|
+
):
|
|
132
|
+
jira_indexer.index_config.projects = None
|
|
133
|
+
jira_indexer.index_config.boards = ["2"]
|
|
134
|
+
mocker.patch.object(JiraConnectionConfig, "get_client", autospec=True)
|
|
135
|
+
|
|
136
|
+
issues = jira_indexer._get_issues_within_projects()
|
|
137
|
+
assert issues == []
|
|
138
|
+
|
|
139
|
+
|
|
140
|
+
def test_jira_indexer_get_issues_within_projects_no_projects_no_boards_no_issues(
|
|
141
|
+
jira_indexer: JiraIndexer,
|
|
142
|
+
mock_jira: MagicMock,
|
|
143
|
+
):
|
|
144
|
+
jira_indexer.index_config.projects = None
|
|
145
|
+
jira_indexer.index_config.boards = None
|
|
146
|
+
jira_indexer.index_config.issues = None
|
|
147
|
+
mock_jira.projects.return_value = [{"key": "TEST1"}, {"key": "TEST2"}]
|
|
148
|
+
mock_jira.get_project_issues_count.return_value = 2
|
|
149
|
+
mock_jira.get_all_project_issues.return_value = [
|
|
150
|
+
{"id": "1", "key": "TEST-1"},
|
|
151
|
+
{"id": "2", "key": "TEST-2"},
|
|
152
|
+
]
|
|
153
|
+
|
|
154
|
+
issues = jira_indexer._get_issues_within_projects()
|
|
155
|
+
assert len(issues) == 4
|
|
156
|
+
assert issues[0].id == "1"
|
|
157
|
+
assert issues[0].key == "TEST-1"
|
|
158
|
+
assert issues[1].id == "2"
|
|
159
|
+
assert issues[1].key == "TEST-2"
|
|
160
|
+
assert issues[2].id == "1"
|
|
161
|
+
assert issues[2].key == "TEST-1"
|
|
162
|
+
assert issues[3].id == "2"
|
|
163
|
+
assert issues[3].key == "TEST-2"
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def test_jira_indexer_get_issues_within_boards(
|
|
167
|
+
jira_indexer: JiraIndexer,
|
|
168
|
+
mock_jira: MagicMock,
|
|
169
|
+
):
|
|
170
|
+
mock_jira.get_issues_for_board.return_value = [
|
|
171
|
+
{"id": "1", "key": "TEST-1"},
|
|
172
|
+
{"id": "2", "key": "TEST-2"},
|
|
173
|
+
]
|
|
174
|
+
|
|
175
|
+
issues = jira_indexer._get_issues_within_boards()
|
|
176
|
+
assert len(issues) == 2
|
|
177
|
+
assert issues[0].id == "1"
|
|
178
|
+
assert issues[0].key == "TEST-1"
|
|
179
|
+
assert issues[1].id == "2"
|
|
180
|
+
assert issues[1].key == "TEST-2"
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
def test_jira_indexer_get_issues_within_single_board(
|
|
184
|
+
jira_indexer: JiraIndexer,
|
|
185
|
+
mock_jira: MagicMock,
|
|
186
|
+
):
|
|
187
|
+
mock_jira.get_issues_for_board.return_value = [
|
|
188
|
+
{"id": "1", "key": "TEST-1"},
|
|
189
|
+
{"id": "2", "key": "TEST-2"},
|
|
190
|
+
]
|
|
191
|
+
|
|
192
|
+
issues = jira_indexer._get_issues_within_single_board("1")
|
|
193
|
+
assert len(issues) == 2
|
|
194
|
+
assert issues[0].id == "1"
|
|
195
|
+
assert issues[0].key == "TEST-1"
|
|
196
|
+
assert issues[0].board_id == "1"
|
|
197
|
+
assert issues[1].id == "2"
|
|
198
|
+
assert issues[1].key == "TEST-2"
|
|
199
|
+
assert issues[1].board_id == "1"
|
|
200
|
+
|
|
201
|
+
|
|
202
|
+
def test_jira_indexer_get_issues_within_single_board_no_issues(
|
|
203
|
+
jira_indexer: JiraIndexer,
|
|
204
|
+
mock_jira: MagicMock,
|
|
205
|
+
):
|
|
206
|
+
mock_jira.get_issues_for_board.return_value = []
|
|
207
|
+
|
|
208
|
+
issues = jira_indexer._get_issues_within_single_board("1")
|
|
209
|
+
assert len(issues) == 0
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def test_jira_indexer_get_issues(
|
|
213
|
+
jira_indexer: JiraIndexer,
|
|
214
|
+
mock_jira: MagicMock,
|
|
215
|
+
):
|
|
216
|
+
jira_indexer.index_config.issues = ["TEST2-1", "TEST2-2"]
|
|
217
|
+
mock_jira.get_issue.return_value = {
|
|
218
|
+
"id": "ISSUE_ID",
|
|
219
|
+
"key": "ISSUE_KEY",
|
|
220
|
+
}
|
|
221
|
+
|
|
222
|
+
issues = jira_indexer._get_issues()
|
|
223
|
+
assert len(issues) == 2
|
|
224
|
+
assert issues[0].id == "ISSUE_ID"
|
|
225
|
+
assert issues[0].key == "ISSUE_KEY"
|
|
226
|
+
|
|
227
|
+
|
|
228
|
+
def test_jira_indexer_get_issues_unique_issues(mocker: MockerFixture, jira_indexer: JiraIndexer):
|
|
229
|
+
mocker.patch.object(
|
|
230
|
+
JiraIndexer,
|
|
231
|
+
"_get_issues_within_boards",
|
|
232
|
+
return_value=[
|
|
233
|
+
JiraIssueMetadata(id="1", key="TEST-1", board_id="1"),
|
|
234
|
+
JiraIssueMetadata(id="2", key="TEST-2", board_id="1"),
|
|
235
|
+
],
|
|
236
|
+
)
|
|
237
|
+
mocker.patch.object(
|
|
238
|
+
JiraIndexer,
|
|
239
|
+
"_get_issues_within_projects",
|
|
240
|
+
return_value=[
|
|
241
|
+
JiraIssueMetadata(id="1", key="TEST-1"),
|
|
242
|
+
JiraIssueMetadata(id="3", key="TEST-3"),
|
|
243
|
+
],
|
|
244
|
+
)
|
|
245
|
+
mocker.patch.object(
|
|
246
|
+
JiraIndexer,
|
|
247
|
+
"_get_issues",
|
|
248
|
+
return_value=[
|
|
249
|
+
JiraIssueMetadata(id="4", key="TEST-4"),
|
|
250
|
+
JiraIssueMetadata(id="2", key="TEST-2"),
|
|
251
|
+
],
|
|
252
|
+
)
|
|
253
|
+
|
|
254
|
+
issues = jira_indexer.get_issues()
|
|
255
|
+
assert len(issues) == 4
|
|
256
|
+
assert issues[0].id == "1"
|
|
257
|
+
assert issues[0].key == "TEST-1"
|
|
258
|
+
assert issues[0].board_id == "1"
|
|
259
|
+
assert issues[1].id == "2"
|
|
260
|
+
assert issues[1].key == "TEST-2"
|
|
261
|
+
assert issues[1].board_id == "1"
|
|
262
|
+
assert issues[2].id == "3"
|
|
263
|
+
assert issues[2].key == "TEST-3"
|
|
264
|
+
assert issues[3].id == "4"
|
|
265
|
+
assert issues[3].key == "TEST-4"
|
|
266
|
+
|
|
267
|
+
|
|
268
|
+
def test_jira_indexer_get_issues_no_duplicates(mocker: MockerFixture, jira_indexer: JiraIndexer):
|
|
269
|
+
mocker.patch.object(
|
|
270
|
+
JiraIndexer,
|
|
271
|
+
"_get_issues_within_boards",
|
|
272
|
+
return_value=[
|
|
273
|
+
JiraIssueMetadata(id="1", key="TEST-1", board_id="1"),
|
|
274
|
+
],
|
|
275
|
+
)
|
|
276
|
+
mocker.patch.object(
|
|
277
|
+
JiraIndexer,
|
|
278
|
+
"_get_issues_within_projects",
|
|
279
|
+
return_value=[
|
|
280
|
+
JiraIssueMetadata(id="2", key="TEST-2"),
|
|
281
|
+
],
|
|
282
|
+
)
|
|
283
|
+
mocker.patch.object(
|
|
284
|
+
JiraIndexer,
|
|
285
|
+
"_get_issues",
|
|
286
|
+
return_value=[
|
|
287
|
+
JiraIssueMetadata(id="3", key="TEST-3"),
|
|
288
|
+
],
|
|
289
|
+
)
|
|
290
|
+
|
|
291
|
+
issues = jira_indexer.get_issues()
|
|
292
|
+
assert len(issues) == 3
|
|
293
|
+
assert issues[0].id == "1"
|
|
294
|
+
assert issues[0].key == "TEST-1"
|
|
295
|
+
assert issues[0].board_id == "1"
|
|
296
|
+
assert issues[1].id == "2"
|
|
297
|
+
assert issues[1].key == "TEST-2"
|
|
298
|
+
assert issues[2].id == "3"
|
|
299
|
+
assert issues[2].key == "TEST-3"
|
|
300
|
+
|
|
301
|
+
|
|
302
|
+
def test_jira_indexer_get_issues_empty(mocker: MockerFixture, jira_indexer: JiraIndexer):
|
|
303
|
+
mocker.patch.object(JiraIndexer, "_get_issues_within_boards", return_value=[])
|
|
304
|
+
mocker.patch.object(JiraIndexer, "_get_issues_within_projects", return_value=[])
|
|
305
|
+
mocker.patch.object(JiraIndexer, "_get_issues", return_value=[])
|
|
306
|
+
|
|
307
|
+
issues = jira_indexer.get_issues()
|
|
308
|
+
assert len(issues) == 0
|
|
309
|
+
|
|
310
|
+
|
|
311
|
+
def test_connection_config_multiple_auth():
|
|
312
|
+
with pytest.raises(ValidationError):
|
|
313
|
+
JiraConnectionConfig(
|
|
314
|
+
access_config=JiraAccessConfig(
|
|
315
|
+
password="api_token",
|
|
316
|
+
token="access_token",
|
|
317
|
+
),
|
|
318
|
+
username="user_email",
|
|
319
|
+
url="url",
|
|
320
|
+
)
|
|
321
|
+
|
|
322
|
+
|
|
323
|
+
def test_connection_config_no_auth():
|
|
324
|
+
with pytest.raises(ValidationError):
|
|
325
|
+
JiraConnectionConfig(access_config=JiraAccessConfig(), url="url")
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
def test_connection_config_basic_auth():
|
|
329
|
+
JiraConnectionConfig(
|
|
330
|
+
access_config=JiraAccessConfig(password="api_token"),
|
|
331
|
+
url="url",
|
|
332
|
+
username="user_email",
|
|
333
|
+
)
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
def test_connection_config_pat_auth():
|
|
337
|
+
JiraConnectionConfig(
|
|
338
|
+
access_config=JiraAccessConfig(token="access_token"),
|
|
339
|
+
url="url",
|
|
340
|
+
)
|
|
341
|
+
|
|
342
|
+
|
|
343
|
+
def test_jira_issue_metadata_object():
|
|
344
|
+
expected = {"id": "10000", "key": "TEST-1", "board_id": "1", "project_id": "TEST"}
|
|
345
|
+
metadata = JiraIssueMetadata(id="10000", key="TEST-1", board_id="1")
|
|
346
|
+
assert expected == metadata.to_dict()
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
def test_nested_object_to_field_getter():
|
|
350
|
+
obj = {"a": 1, "b": {"c": 2}}
|
|
351
|
+
fg = nested_object_to_field_getter(obj)
|
|
352
|
+
assert isinstance(fg, FieldGetter)
|
|
353
|
+
assert fg["a"] == 1
|
|
354
|
+
assert isinstance(fg["b"], FieldGetter)
|
|
355
|
+
assert fg["b"]["c"] == 2
|
|
356
|
+
assert isinstance(fg["b"]["d"], FieldGetter)
|
|
357
|
+
assert fg["b"]["d"]["e"] == {}
|
|
358
|
+
|
|
359
|
+
|
|
360
|
+
def test_issues_fetcher_wrapper():
|
|
361
|
+
test_issues_to_fetch = 250
|
|
362
|
+
test_issues = [{"id": i} for i in range(0, test_issues_to_fetch)]
|
|
363
|
+
|
|
364
|
+
def mock_func(limit, start):
|
|
365
|
+
return {"results": test_issues[start : start + limit]}
|
|
366
|
+
|
|
367
|
+
wrapped_func = issues_fetcher_wrapper(mock_func, number_of_issues_to_fetch=test_issues_to_fetch)
|
|
368
|
+
results = wrapped_func()
|
|
369
|
+
assert len(results) == 250
|
|
370
|
+
assert results[0]["id"] == 0
|
|
371
|
+
assert results[-1]["id"] == 249
|
|
372
|
+
|
|
373
|
+
test_issues_to_fetch = 150
|
|
374
|
+
test_issues = [{"id": i} for i in range(0, test_issues_to_fetch)]
|
|
375
|
+
|
|
376
|
+
def mock_func_list(limit, start):
|
|
377
|
+
return test_issues[start : start + limit]
|
|
378
|
+
|
|
379
|
+
wrapped_func_list = issues_fetcher_wrapper(
|
|
380
|
+
mock_func_list, number_of_issues_to_fetch=test_issues_to_fetch
|
|
381
|
+
)
|
|
382
|
+
results_list = wrapped_func_list()
|
|
383
|
+
assert len(results_list) == 150
|
|
384
|
+
assert results_list[0]["id"] == 0
|
|
385
|
+
assert results_list[-1]["id"] == 149
|
|
386
|
+
|
|
387
|
+
def mock_func_invalid(limit, start):
|
|
388
|
+
return "invalid"
|
|
389
|
+
|
|
390
|
+
wrapped_func_invalid = issues_fetcher_wrapper(mock_func_invalid, number_of_issues_to_fetch=50)
|
|
391
|
+
with pytest.raises(TypeError):
|
|
392
|
+
wrapped_func_invalid()
|
|
393
|
+
|
|
394
|
+
def mock_func_key_error(limit, start):
|
|
395
|
+
return {"wrong_key": []}
|
|
396
|
+
|
|
397
|
+
wrapped_func_key_error = issues_fetcher_wrapper(
|
|
398
|
+
mock_func_key_error, number_of_issues_to_fetch=50
|
|
399
|
+
)
|
|
400
|
+
with pytest.raises(KeyError):
|
|
401
|
+
wrapped_func_key_error()
|
|
@@ -1 +1 @@
|
|
|
1
|
-
__version__ = "0.5.
|
|
1
|
+
__version__ = "0.5.10" # pragma: no cover
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
from dataclasses import dataclass
|
|
2
|
-
from typing import TYPE_CHECKING
|
|
2
|
+
from typing import TYPE_CHECKING, Optional
|
|
3
3
|
|
|
4
4
|
from pydantic import Field, SecretStr
|
|
5
5
|
|
|
@@ -26,6 +26,7 @@ if TYPE_CHECKING:
|
|
|
26
26
|
class OpenAIEmbeddingConfig(EmbeddingConfig):
|
|
27
27
|
api_key: SecretStr
|
|
28
28
|
embedder_model_name: str = Field(default="text-embedding-ada-002", alias="model_name")
|
|
29
|
+
base_url: Optional[str] = None
|
|
29
30
|
|
|
30
31
|
def wrap_error(self, e: Exception) -> Exception:
|
|
31
32
|
if is_internal_error(e=e):
|
|
@@ -57,13 +58,13 @@ class OpenAIEmbeddingConfig(EmbeddingConfig):
|
|
|
57
58
|
def get_client(self) -> "OpenAI":
|
|
58
59
|
from openai import OpenAI
|
|
59
60
|
|
|
60
|
-
return OpenAI(api_key=self.api_key.get_secret_value())
|
|
61
|
+
return OpenAI(api_key=self.api_key.get_secret_value(), base_url=self.base_url)
|
|
61
62
|
|
|
62
63
|
@requires_dependencies(["openai"], extras="openai")
|
|
63
64
|
def get_async_client(self) -> "AsyncOpenAI":
|
|
64
65
|
from openai import AsyncOpenAI
|
|
65
66
|
|
|
66
|
-
return AsyncOpenAI(api_key=self.api_key.get_secret_value())
|
|
67
|
+
return AsyncOpenAI(api_key=self.api_key.get_secret_value(), base_url=self.base_url)
|
|
67
68
|
|
|
68
69
|
|
|
69
70
|
@dataclass
|