udata 10.8.2.dev36743__py2.py3-none-any.whl → 10.8.2.dev36842__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of udata might be problematic. Click here for more details.
- udata/harvest/backends/ckan/__init__.py +3 -0
- udata/harvest/backends/ckan/harvesters.py +274 -0
- udata/harvest/backends/ckan/models.py +10 -0
- udata/harvest/backends/ckan/schemas/__init__.py +0 -0
- udata/harvest/backends/ckan/schemas/ckan.py +86 -0
- udata/harvest/backends/ckan/schemas/dkan.py +98 -0
- udata/harvest/tests/ckan/conftest.py +67 -0
- udata/harvest/tests/ckan/data/dkan-french-w-license.json +226 -0
- udata/harvest/tests/ckan/test_ckan_backend.py +697 -0
- udata/harvest/tests/ckan/test_ckan_backend_errors.py +140 -0
- udata/harvest/tests/ckan/test_ckan_backend_filters.py +130 -0
- udata/harvest/tests/ckan/test_dkan_backend.py +68 -0
- udata/static/chunks/{11.b6f741fcc366abfad9c4.js → 11.0f04e49a40a0a381bcce.js} +3 -3
- udata/static/chunks/{11.b6f741fcc366abfad9c4.js.map → 11.0f04e49a40a0a381bcce.js.map} +1 -1
- udata/static/chunks/{13.2d06442dd9a05d9777b5.js → 13.d9c1735d14038b94c17e.js} +2 -2
- udata/static/chunks/{13.2d06442dd9a05d9777b5.js.map → 13.d9c1735d14038b94c17e.js.map} +1 -1
- udata/static/chunks/{17.e8e4caaad5cb0cc0bacc.js → 17.81c57c0dedf812e43013.js} +2 -2
- udata/static/chunks/{17.e8e4caaad5cb0cc0bacc.js.map → 17.81c57c0dedf812e43013.js.map} +1 -1
- udata/static/chunks/{19.f03a102365af4315f9db.js → 19.8da42e8359d72afc2618.js} +3 -3
- udata/static/chunks/{19.f03a102365af4315f9db.js.map → 19.8da42e8359d72afc2618.js.map} +1 -1
- udata/static/chunks/{8.778091d55cd8ea39af6b.js → 8.494b003a94383b142c18.js} +2 -2
- udata/static/chunks/{8.778091d55cd8ea39af6b.js.map → 8.494b003a94383b142c18.js.map} +1 -1
- udata/static/common.js +1 -1
- udata/static/common.js.map +1 -1
- udata/translations/ar/LC_MESSAGES/udata.mo +0 -0
- udata/translations/ar/LC_MESSAGES/udata.po +72 -65
- udata/translations/de/LC_MESSAGES/udata.mo +0 -0
- udata/translations/de/LC_MESSAGES/udata.po +72 -65
- udata/translations/es/LC_MESSAGES/udata.mo +0 -0
- udata/translations/es/LC_MESSAGES/udata.po +72 -65
- udata/translations/fr/LC_MESSAGES/udata.mo +0 -0
- udata/translations/fr/LC_MESSAGES/udata.po +72 -65
- udata/translations/it/LC_MESSAGES/udata.mo +0 -0
- udata/translations/it/LC_MESSAGES/udata.po +72 -65
- udata/translations/pt/LC_MESSAGES/udata.mo +0 -0
- udata/translations/pt/LC_MESSAGES/udata.po +72 -65
- udata/translations/sr/LC_MESSAGES/udata.mo +0 -0
- udata/translations/sr/LC_MESSAGES/udata.po +72 -65
- udata/translations/udata.pot +74 -70
- {udata-10.8.2.dev36743.dist-info → udata-10.8.2.dev36842.dist-info}/METADATA +3 -1
- {udata-10.8.2.dev36743.dist-info → udata-10.8.2.dev36842.dist-info}/RECORD +45 -33
- {udata-10.8.2.dev36743.dist-info → udata-10.8.2.dev36842.dist-info}/entry_points.txt +2 -0
- {udata-10.8.2.dev36743.dist-info → udata-10.8.2.dev36842.dist-info}/LICENSE +0 -0
- {udata-10.8.2.dev36743.dist-info → udata-10.8.2.dev36842.dist-info}/WHEEL +0 -0
- {udata-10.8.2.dev36743.dist-info → udata-10.8.2.dev36842.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,140 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
|
|
3
|
+
from udata.harvest import actions
|
|
4
|
+
from udata.harvest.tests.factories import HarvestSourceFactory
|
|
5
|
+
|
|
6
|
+
pytestmark = [
|
|
7
|
+
pytest.mark.usefixtures("clean_db"),
|
|
8
|
+
pytest.mark.options(PLUGINS=["ckan"]),
|
|
9
|
+
]
|
|
10
|
+
|
|
11
|
+
CKAN_URL = "https://harvest.me/"
|
|
12
|
+
API_URL = "{}api/3/action/package_list".format(CKAN_URL)
|
|
13
|
+
|
|
14
|
+
# We test against success and error status code
|
|
15
|
+
# because CKAN API always return 200
|
|
16
|
+
# but some other cases may happen outside the API
|
|
17
|
+
STATUS_CODE = (400, 500)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
@pytest.mark.parametrize("code", STATUS_CODE)
|
|
21
|
+
def test_html_error(rmock, code):
|
|
22
|
+
# Happens with wrong source URL (html is returned instead of json)
|
|
23
|
+
html = "<html><body>Error</body></html>"
|
|
24
|
+
source = HarvestSourceFactory(backend="ckan", url=CKAN_URL)
|
|
25
|
+
|
|
26
|
+
rmock.get(API_URL, text=html, status_code=code, headers={"Content-Type": "text/html"})
|
|
27
|
+
|
|
28
|
+
actions.run(source.slug)
|
|
29
|
+
|
|
30
|
+
source.reload()
|
|
31
|
+
|
|
32
|
+
job = source.get_last_job()
|
|
33
|
+
assert len(job.items) == 0
|
|
34
|
+
assert len(job.errors) == 1
|
|
35
|
+
error = job.errors[0]
|
|
36
|
+
# HTML is detected and does not clutter the message
|
|
37
|
+
assert html not in error.message
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
@pytest.mark.parametrize("code", STATUS_CODE)
|
|
41
|
+
def test_plain_text_error(rmock, code):
|
|
42
|
+
source = HarvestSourceFactory(backend="ckan", url=CKAN_URL)
|
|
43
|
+
|
|
44
|
+
rmock.get(
|
|
45
|
+
API_URL, text='"Some error"', status_code=code, headers={"Content-Type": "text/plain"}
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
actions.run(source.slug)
|
|
49
|
+
|
|
50
|
+
source.reload()
|
|
51
|
+
|
|
52
|
+
job = source.get_last_job()
|
|
53
|
+
assert len(job.items) == 0
|
|
54
|
+
assert len(job.errors) == 1
|
|
55
|
+
error = job.errors[0]
|
|
56
|
+
# Raw quoted string is properly unquoted
|
|
57
|
+
http_message = "Server Error" if code == 500 else "Client Error"
|
|
58
|
+
assert (
|
|
59
|
+
error.message
|
|
60
|
+
== f"{code} {http_message}: None for url: https://harvest.me/api/3/action/package_list"
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
|
|
64
|
+
def test_200_plain_text_error(rmock):
|
|
65
|
+
source = HarvestSourceFactory(backend="ckan", url=CKAN_URL)
|
|
66
|
+
|
|
67
|
+
rmock.get(API_URL, text='"Some error"', status_code=200, headers={"Content-Type": "text/plain"})
|
|
68
|
+
|
|
69
|
+
actions.run(source.slug)
|
|
70
|
+
|
|
71
|
+
source.reload()
|
|
72
|
+
|
|
73
|
+
job = source.get_last_job()
|
|
74
|
+
assert len(job.items) == 0
|
|
75
|
+
assert len(job.errors) == 1
|
|
76
|
+
error = job.errors[0]
|
|
77
|
+
# Raw quoted string is properly unquoted
|
|
78
|
+
assert error.message == "Some error"
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def test_standard_api_json_error(rmock):
|
|
82
|
+
json = {"success": False, "error": "an error"}
|
|
83
|
+
source = HarvestSourceFactory(backend="ckan", url=CKAN_URL)
|
|
84
|
+
|
|
85
|
+
rmock.get(API_URL, json=json, status_code=200, headers={"Content-Type": "application/json"})
|
|
86
|
+
|
|
87
|
+
actions.run(source.slug)
|
|
88
|
+
|
|
89
|
+
source.reload()
|
|
90
|
+
|
|
91
|
+
job = source.get_last_job()
|
|
92
|
+
assert len(job.items) == 0
|
|
93
|
+
assert len(job.errors) == 1
|
|
94
|
+
error = job.errors[0]
|
|
95
|
+
assert error.message == "an error"
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
def test_standard_api_json_error_with_details(rmock):
|
|
99
|
+
json = {
|
|
100
|
+
"success": False,
|
|
101
|
+
"error": {
|
|
102
|
+
"message": "an error",
|
|
103
|
+
},
|
|
104
|
+
}
|
|
105
|
+
source = HarvestSourceFactory(backend="ckan", url=CKAN_URL)
|
|
106
|
+
|
|
107
|
+
rmock.get(API_URL, json=json, status_code=200, headers={"Content-Type": "application/json"})
|
|
108
|
+
|
|
109
|
+
actions.run(source.slug)
|
|
110
|
+
|
|
111
|
+
source.reload()
|
|
112
|
+
|
|
113
|
+
job = source.get_last_job()
|
|
114
|
+
assert len(job.items) == 0
|
|
115
|
+
assert len(job.errors) == 1
|
|
116
|
+
error = job.errors[0]
|
|
117
|
+
assert error.message == "an error"
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def test_standard_api_json_error_with_details_and_type(rmock):
|
|
121
|
+
json = {
|
|
122
|
+
"success": False,
|
|
123
|
+
"error": {
|
|
124
|
+
"message": "Access denied",
|
|
125
|
+
"__type": "Authorization Error",
|
|
126
|
+
},
|
|
127
|
+
}
|
|
128
|
+
source = HarvestSourceFactory(backend="ckan", url=CKAN_URL)
|
|
129
|
+
|
|
130
|
+
rmock.get(API_URL, json=json, status_code=200, headers={"Content-Type": "application/json"})
|
|
131
|
+
|
|
132
|
+
actions.run(source.slug)
|
|
133
|
+
|
|
134
|
+
source.reload()
|
|
135
|
+
|
|
136
|
+
job = source.get_last_job()
|
|
137
|
+
assert len(job.items) == 0
|
|
138
|
+
assert len(job.errors) == 1
|
|
139
|
+
error = job.errors[0]
|
|
140
|
+
assert error.message == "Authorization Error: Access denied"
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
import urllib
|
|
2
|
+
|
|
3
|
+
import pytest
|
|
4
|
+
|
|
5
|
+
from udata.harvest import actions
|
|
6
|
+
from udata.harvest.tests.factories import HarvestSourceFactory
|
|
7
|
+
from udata.utils import faker
|
|
8
|
+
|
|
9
|
+
pytestmark = [
|
|
10
|
+
pytest.mark.usefixtures("clean_db"),
|
|
11
|
+
pytest.mark.options(PLUGINS=["ckan"]),
|
|
12
|
+
]
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def test_include_org_filter(ckan, rmock):
|
|
16
|
+
source = HarvestSourceFactory(
|
|
17
|
+
backend="ckan",
|
|
18
|
+
url=ckan.BASE_URL,
|
|
19
|
+
config={"filters": [{"key": "organization", "value": "organization_name"}]},
|
|
20
|
+
)
|
|
21
|
+
|
|
22
|
+
rmock.get(
|
|
23
|
+
ckan.PACKAGE_SEARCH_URL,
|
|
24
|
+
json={"success": True, "result": {"results": []}},
|
|
25
|
+
status_code=200,
|
|
26
|
+
headers={"Content-Type": "application/json"},
|
|
27
|
+
)
|
|
28
|
+
|
|
29
|
+
actions.run(source.slug)
|
|
30
|
+
source.reload()
|
|
31
|
+
|
|
32
|
+
assert rmock.call_count == 1
|
|
33
|
+
params = {"q": "organization:organization_name", "rows": 1000}
|
|
34
|
+
assert rmock.last_request.url == f"{ckan.PACKAGE_SEARCH_URL}?{urllib.parse.urlencode(params)}"
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
def test_exclude_org_filter(ckan, rmock):
|
|
38
|
+
source = HarvestSourceFactory(
|
|
39
|
+
backend="ckan",
|
|
40
|
+
url=ckan.BASE_URL,
|
|
41
|
+
config={
|
|
42
|
+
"filters": [{"key": "organization", "value": "organization_name", "type": "exclude"}]
|
|
43
|
+
},
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
rmock.get(
|
|
47
|
+
ckan.PACKAGE_SEARCH_URL,
|
|
48
|
+
json={"success": True, "result": {"results": []}},
|
|
49
|
+
status_code=200,
|
|
50
|
+
headers={"Content-Type": "application/json"},
|
|
51
|
+
)
|
|
52
|
+
|
|
53
|
+
actions.run(source.slug)
|
|
54
|
+
source.reload()
|
|
55
|
+
|
|
56
|
+
assert rmock.call_count == 1
|
|
57
|
+
|
|
58
|
+
params = {"q": "-organization:organization_name", "rows": 1000}
|
|
59
|
+
assert rmock.last_request.url == f"{ckan.PACKAGE_SEARCH_URL}?{urllib.parse.urlencode(params)}"
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def test_tag_filter(ckan, rmock):
|
|
63
|
+
tag = faker.word()
|
|
64
|
+
source = HarvestSourceFactory(
|
|
65
|
+
backend="ckan", url=ckan.BASE_URL, config={"filters": [{"key": "tags", "value": tag}]}
|
|
66
|
+
)
|
|
67
|
+
|
|
68
|
+
rmock.get(
|
|
69
|
+
ckan.PACKAGE_SEARCH_URL,
|
|
70
|
+
json={"success": True, "result": {"results": []}},
|
|
71
|
+
status_code=200,
|
|
72
|
+
headers={"Content-Type": "application/json"},
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
actions.run(source.slug)
|
|
76
|
+
source.reload()
|
|
77
|
+
|
|
78
|
+
assert rmock.call_count == 1
|
|
79
|
+
params = {"q": f"tags:{tag}", "rows": 1000}
|
|
80
|
+
assert rmock.last_request.url == f"{ckan.PACKAGE_SEARCH_URL}?{urllib.parse.urlencode(params)}"
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
def test_exclude_tag_filter(ckan, rmock):
|
|
84
|
+
tag = faker.word()
|
|
85
|
+
source = HarvestSourceFactory(
|
|
86
|
+
backend="ckan",
|
|
87
|
+
url=ckan.BASE_URL,
|
|
88
|
+
config={"filters": [{"key": "tags", "value": tag, "type": "exclude"}]},
|
|
89
|
+
)
|
|
90
|
+
|
|
91
|
+
rmock.get(
|
|
92
|
+
ckan.PACKAGE_SEARCH_URL,
|
|
93
|
+
json={"success": True, "result": {"results": []}},
|
|
94
|
+
status_code=200,
|
|
95
|
+
headers={"Content-Type": "application/json"},
|
|
96
|
+
)
|
|
97
|
+
|
|
98
|
+
actions.run(source.slug)
|
|
99
|
+
source.reload()
|
|
100
|
+
|
|
101
|
+
assert rmock.call_count == 1
|
|
102
|
+
params = {"q": f"-tags:{tag}", "rows": 1000}
|
|
103
|
+
assert rmock.last_request.url == f"{ckan.PACKAGE_SEARCH_URL}?{urllib.parse.urlencode(params)}"
|
|
104
|
+
|
|
105
|
+
|
|
106
|
+
def test_can_have_multiple_filters(ckan, rmock):
|
|
107
|
+
source = HarvestSourceFactory(
|
|
108
|
+
backend="ckan",
|
|
109
|
+
url=ckan.BASE_URL,
|
|
110
|
+
config={
|
|
111
|
+
"filters": [
|
|
112
|
+
{"key": "organization", "value": "organization_name"},
|
|
113
|
+
{"key": "tags", "value": "tag-2", "type": "exclude"},
|
|
114
|
+
]
|
|
115
|
+
},
|
|
116
|
+
)
|
|
117
|
+
|
|
118
|
+
rmock.get(
|
|
119
|
+
ckan.PACKAGE_SEARCH_URL,
|
|
120
|
+
json={"success": True, "result": {"results": []}},
|
|
121
|
+
status_code=200,
|
|
122
|
+
headers={"Content-Type": "application/json"},
|
|
123
|
+
)
|
|
124
|
+
|
|
125
|
+
actions.run(source.slug)
|
|
126
|
+
source.reload()
|
|
127
|
+
|
|
128
|
+
assert rmock.call_count == 1
|
|
129
|
+
params = {"q": "organization:organization_name AND -tags:tag-2", "rows": 1000}
|
|
130
|
+
assert rmock.last_request.url == f"{ckan.PACKAGE_SEARCH_URL}?{urllib.parse.urlencode(params)}"
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
from datetime import datetime
|
|
4
|
+
|
|
5
|
+
import pytest
|
|
6
|
+
|
|
7
|
+
from udata.app import create_app
|
|
8
|
+
from udata.core.organization.factories import OrganizationFactory
|
|
9
|
+
from udata.harvest import actions
|
|
10
|
+
from udata.harvest.tests.factories import HarvestSourceFactory
|
|
11
|
+
from udata.models import Dataset
|
|
12
|
+
from udata.settings import Defaults, Testing
|
|
13
|
+
from udata.tests.plugin import drop_db
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def data_path(filename):
|
|
17
|
+
"""Get a test data path"""
|
|
18
|
+
return os.path.join(os.path.dirname(__file__), "data", filename)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class DkanSettings(Testing):
|
|
22
|
+
PLUGINS = ["dkan"]
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
@pytest.fixture(scope="module")
|
|
26
|
+
def app(request):
|
|
27
|
+
"""Create an udata app once for the module."""
|
|
28
|
+
app = create_app(Defaults, override=DkanSettings)
|
|
29
|
+
with app.app_context():
|
|
30
|
+
drop_db(app)
|
|
31
|
+
yield app
|
|
32
|
+
with app.app_context():
|
|
33
|
+
drop_db(app)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def test_dkan_french_w_license(app, rmock):
|
|
37
|
+
"""CKAN Harvester should accept the minimum dataset payload"""
|
|
38
|
+
DKAN_URL = "https://harvest.me/"
|
|
39
|
+
API_URL = "{}api/3/action/".format(DKAN_URL)
|
|
40
|
+
PACKAGE_LIST_URL = "{}package_list".format(API_URL)
|
|
41
|
+
PACKAGE_SHOW_URL = "{}package_show".format(API_URL)
|
|
42
|
+
|
|
43
|
+
with open(data_path("dkan-french-w-license.json")) as ifile:
|
|
44
|
+
data = json.loads(ifile.read())
|
|
45
|
+
|
|
46
|
+
org = OrganizationFactory()
|
|
47
|
+
source = HarvestSourceFactory(backend="dkan", url=DKAN_URL, organization=org)
|
|
48
|
+
rmock.get(
|
|
49
|
+
PACKAGE_LIST_URL,
|
|
50
|
+
json={"success": True, "result": ["fake-name"]},
|
|
51
|
+
status_code=200,
|
|
52
|
+
headers={"Content-Type": "application/json"},
|
|
53
|
+
)
|
|
54
|
+
rmock.get(
|
|
55
|
+
PACKAGE_SHOW_URL, json=data, status_code=200, headers={"Content-Type": "application/json"}
|
|
56
|
+
)
|
|
57
|
+
actions.run(source.slug)
|
|
58
|
+
source.reload()
|
|
59
|
+
assert source.get_last_job().status == "done"
|
|
60
|
+
|
|
61
|
+
datasets = Dataset.objects.filter(organization=org)
|
|
62
|
+
assert len(datasets) > 0
|
|
63
|
+
|
|
64
|
+
dataset = datasets.get(**{"harvest__remote_id": "04be6288-696d-4331-850d-a144871a7e3a"})
|
|
65
|
+
assert dataset.harvest.created_at == datetime(2019, 12, 10, 0, 0)
|
|
66
|
+
assert dataset.harvest.modified_at == datetime(2019, 9, 30, 0, 0)
|
|
67
|
+
assert len(dataset.resources) == 2
|
|
68
|
+
assert "xlsx" in [r.format for r in dataset.resources]
|