udata 10.5.1.dev36025__py2.py3-none-any.whl → 10.5.1.dev36043__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of udata might be problematic. Click here for more details.
- udata/core/dataset/recommendations.py +208 -0
- udata/core/dataset/transport.py +46 -0
- udata/schemas/recommendations.json +41 -0
- udata/settings.py +8 -0
- udata/tasks.py +2 -0
- udata/tests/dataset/test_dataset_recommendations.py +207 -0
- udata/tests/dataset/test_transport_tasks.py +75 -0
- {udata-10.5.1.dev36025.dist-info → udata-10.5.1.dev36043.dist-info}/METADATA +3 -1
- {udata-10.5.1.dev36025.dist-info → udata-10.5.1.dev36043.dist-info}/RECORD +13 -8
- {udata-10.5.1.dev36025.dist-info → udata-10.5.1.dev36043.dist-info}/LICENSE +0 -0
- {udata-10.5.1.dev36025.dist-info → udata-10.5.1.dev36043.dist-info}/WHEEL +0 -0
- {udata-10.5.1.dev36025.dist-info → udata-10.5.1.dev36043.dist-info}/entry_points.txt +0 -0
- {udata-10.5.1.dev36025.dist-info → udata-10.5.1.dev36043.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import logging
|
|
3
|
+
from importlib.resources import files
|
|
4
|
+
|
|
5
|
+
import jsonschema
|
|
6
|
+
import mongoengine
|
|
7
|
+
import requests
|
|
8
|
+
from flask import current_app
|
|
9
|
+
|
|
10
|
+
from udata.commands import error, success
|
|
11
|
+
from udata.models import Dataset, Reuse
|
|
12
|
+
from udata.tasks import job
|
|
13
|
+
from udata.uris import validate
|
|
14
|
+
|
|
15
|
+
log = logging.getLogger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def recommendations_clean():
|
|
19
|
+
nb_datasets = Dataset.objects.filter(
|
|
20
|
+
**{
|
|
21
|
+
"extras__recommendations__exists": True,
|
|
22
|
+
}
|
|
23
|
+
).update(
|
|
24
|
+
**{
|
|
25
|
+
"unset__extras__recommendations": True,
|
|
26
|
+
"unset__extras__recommendations-reuses": True,
|
|
27
|
+
"unset__extras__recommendations:sources": True,
|
|
28
|
+
}
|
|
29
|
+
)
|
|
30
|
+
success(f"Removed recommendations from {nb_datasets} dataset(s)")
|
|
31
|
+
|
|
32
|
+
|
|
33
|
+
schema_path = files("udata").joinpath("schemas", "recommendations.json")
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
def get_recommendations_data(url):
|
|
37
|
+
response = requests.get(url, timeout=10)
|
|
38
|
+
response.raise_for_status()
|
|
39
|
+
data = response.json()
|
|
40
|
+
|
|
41
|
+
with schema_path.open() as f:
|
|
42
|
+
schema = json.load(f)
|
|
43
|
+
jsonschema.validate(instance=data, schema=schema)
|
|
44
|
+
|
|
45
|
+
return data
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
def get_unique_recommendations(recos):
|
|
49
|
+
"""
|
|
50
|
+
This function returns a list of unique recos, based on the `id` key.
|
|
51
|
+
The first unique element found is kept, following ones are ignored.
|
|
52
|
+
Thus you should order the list accordingly before applying this function.
|
|
53
|
+
"""
|
|
54
|
+
ids_seen = set()
|
|
55
|
+
unique_recos = []
|
|
56
|
+
for reco in recos:
|
|
57
|
+
if reco["id"] not in ids_seen:
|
|
58
|
+
ids_seen.add(reco["id"])
|
|
59
|
+
unique_recos.append(reco)
|
|
60
|
+
return unique_recos
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
def get_dataset(id_or_slug):
|
|
64
|
+
obj = Dataset.objects(slug=id_or_slug).first()
|
|
65
|
+
return obj or Dataset.objects.get(id=id_or_slug)
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def get_reuse(id_or_slug):
|
|
69
|
+
obj = Reuse.objects(slug=id_or_slug).first()
|
|
70
|
+
return obj or Reuse.objects.get(id=id_or_slug)
|
|
71
|
+
|
|
72
|
+
|
|
73
|
+
def process_source(source, recommendations_data):
|
|
74
|
+
for dataset in recommendations_data:
|
|
75
|
+
process_dataset(source, dataset)
|
|
76
|
+
|
|
77
|
+
|
|
78
|
+
def process_dataset(source, dataset):
|
|
79
|
+
try:
|
|
80
|
+
target_dataset = get_dataset(dataset["id"])
|
|
81
|
+
except (Dataset.DoesNotExist, mongoengine.errors.ValidationError):
|
|
82
|
+
error(f"Dataset {dataset['id']} not found")
|
|
83
|
+
return
|
|
84
|
+
|
|
85
|
+
log.info(f"Processing recommendations for dataset {dataset['id']}")
|
|
86
|
+
valid_recos_datasets = []
|
|
87
|
+
valid_recos_reuses = []
|
|
88
|
+
valid_recos_externals = []
|
|
89
|
+
for reco in dataset["recommendations"]:
|
|
90
|
+
# default type is `dataset` for retrocompat
|
|
91
|
+
reco_type = reco.get("type", "dataset")
|
|
92
|
+
if reco_type == "dataset":
|
|
93
|
+
try:
|
|
94
|
+
reco_dataset_obj = get_dataset(reco["id"])
|
|
95
|
+
if reco_dataset_obj.id == target_dataset.id:
|
|
96
|
+
continue
|
|
97
|
+
valid_recos_datasets.append(
|
|
98
|
+
{
|
|
99
|
+
"id": str(reco_dataset_obj.id),
|
|
100
|
+
"score": reco["score"],
|
|
101
|
+
"source": source,
|
|
102
|
+
}
|
|
103
|
+
)
|
|
104
|
+
except (Dataset.DoesNotExist, mongoengine.errors.ValidationError):
|
|
105
|
+
error(f"Recommended dataset {reco['id']} not found")
|
|
106
|
+
continue
|
|
107
|
+
elif reco_type == "reuse":
|
|
108
|
+
try:
|
|
109
|
+
reuse = get_reuse(reco["id"])
|
|
110
|
+
valid_recos_reuses.append(
|
|
111
|
+
{
|
|
112
|
+
"id": str(reuse.id),
|
|
113
|
+
"score": reco["score"],
|
|
114
|
+
"source": source,
|
|
115
|
+
}
|
|
116
|
+
)
|
|
117
|
+
except (Reuse.DoesNotExist, mongoengine.errors.ValidationError):
|
|
118
|
+
error(f"Recommended reuse {reco['id']} not found")
|
|
119
|
+
continue
|
|
120
|
+
elif reco_type == "external":
|
|
121
|
+
try:
|
|
122
|
+
external = validate(reco["id"])
|
|
123
|
+
valid_recos_externals.append(
|
|
124
|
+
{
|
|
125
|
+
"id": external,
|
|
126
|
+
"score": reco["score"],
|
|
127
|
+
"source": source,
|
|
128
|
+
"messages": reco["messages"],
|
|
129
|
+
}
|
|
130
|
+
)
|
|
131
|
+
except ValueError:
|
|
132
|
+
error(f"Recommended external {reco['id']} is not a valid url")
|
|
133
|
+
continue
|
|
134
|
+
else:
|
|
135
|
+
error(f"Unknown recommendation type {reco_type}")
|
|
136
|
+
continue
|
|
137
|
+
|
|
138
|
+
if len(valid_recos_datasets) or len(valid_recos_reuses) or len(valid_recos_externals):
|
|
139
|
+
new_sources = set(target_dataset.extras.get("recommendations:sources", []))
|
|
140
|
+
new_sources.add(source)
|
|
141
|
+
target_dataset.extras["recommendations:sources"] = list(new_sources)
|
|
142
|
+
|
|
143
|
+
if len(valid_recos_datasets):
|
|
144
|
+
success(
|
|
145
|
+
f"Found {len(valid_recos_datasets)} new dataset recommendations for dataset {dataset['id']}"
|
|
146
|
+
)
|
|
147
|
+
|
|
148
|
+
merged_recommendations = valid_recos_datasets + target_dataset.extras.get(
|
|
149
|
+
"recommendations", []
|
|
150
|
+
)
|
|
151
|
+
unique_recommendations = get_unique_recommendations(merged_recommendations)
|
|
152
|
+
new_recommendations = sorted(unique_recommendations, key=lambda k: k["score"], reverse=True)
|
|
153
|
+
|
|
154
|
+
target_dataset.extras["recommendations"] = new_recommendations
|
|
155
|
+
|
|
156
|
+
if len(valid_recos_reuses):
|
|
157
|
+
success(
|
|
158
|
+
f"Found {len(valid_recos_reuses)} new reuse recommendations for dataset {dataset['id']}"
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
merged_recommendations = valid_recos_reuses + target_dataset.extras.get(
|
|
162
|
+
"recommendations-reuses", []
|
|
163
|
+
)
|
|
164
|
+
unique_recommendations = get_unique_recommendations(merged_recommendations)
|
|
165
|
+
new_recommendations = sorted(unique_recommendations, key=lambda k: k["score"], reverse=True)
|
|
166
|
+
|
|
167
|
+
target_dataset.extras["recommendations-reuses"] = new_recommendations
|
|
168
|
+
|
|
169
|
+
if len(valid_recos_externals):
|
|
170
|
+
success(
|
|
171
|
+
f"Found {len(valid_recos_externals)} new external recommendations for dataset {dataset['id']}"
|
|
172
|
+
)
|
|
173
|
+
|
|
174
|
+
merged_recommendations = valid_recos_externals + target_dataset.extras.get(
|
|
175
|
+
"recommendations-externals", []
|
|
176
|
+
)
|
|
177
|
+
unique_recommendations = get_unique_recommendations(merged_recommendations)
|
|
178
|
+
new_recommendations = sorted(unique_recommendations, key=lambda k: k["score"], reverse=True)
|
|
179
|
+
|
|
180
|
+
target_dataset.extras["recommendations-externals"] = new_recommendations
|
|
181
|
+
|
|
182
|
+
if len(valid_recos_datasets) or len(valid_recos_reuses) or len(valid_recos_externals):
|
|
183
|
+
target_dataset.save()
|
|
184
|
+
else:
|
|
185
|
+
error(f"No recommendations found for dataset {dataset['id']}")
|
|
186
|
+
|
|
187
|
+
|
|
188
|
+
def recommendations_add(sources, should_clean):
|
|
189
|
+
if should_clean:
|
|
190
|
+
log.info("Cleaning up dataset recommendations")
|
|
191
|
+
recommendations_clean()
|
|
192
|
+
|
|
193
|
+
for source, url in sources.items():
|
|
194
|
+
log.info(f"Fetching dataset recommendations from {url}, source {source}")
|
|
195
|
+
process_source(source, get_recommendations_data(url))
|
|
196
|
+
|
|
197
|
+
|
|
198
|
+
@job("recommendations-clean")
|
|
199
|
+
def run_recommendations_clean(self):
|
|
200
|
+
recommendations_clean()
|
|
201
|
+
|
|
202
|
+
|
|
203
|
+
@job("recommendations-add")
|
|
204
|
+
def run_recommendations_add(self, should_clean=True):
|
|
205
|
+
should_clean = should_clean in [True, "true", "True"]
|
|
206
|
+
sources = current_app.config.get("RECOMMENDATIONS_SOURCES", {})
|
|
207
|
+
|
|
208
|
+
recommendations_add(sources, should_clean)
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
import requests
|
|
2
|
+
from flask import current_app
|
|
3
|
+
|
|
4
|
+
from udata.commands import error, success
|
|
5
|
+
from udata.models import Dataset
|
|
6
|
+
from udata.tasks import job
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def process_dataset(dataset):
|
|
10
|
+
target_dataset = Dataset.objects(id=dataset["datagouv_id"]).first()
|
|
11
|
+
if not target_dataset:
|
|
12
|
+
error(f"Dataset {dataset['id']} not found")
|
|
13
|
+
return
|
|
14
|
+
target_dataset.extras["transport:url"] = dataset["page_url"]
|
|
15
|
+
target_dataset.save()
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
def clear_datasets():
|
|
19
|
+
nb_datasets = Dataset.objects.filter(
|
|
20
|
+
**{
|
|
21
|
+
"extras__transport:url__exists": True,
|
|
22
|
+
}
|
|
23
|
+
).update(
|
|
24
|
+
**{
|
|
25
|
+
"unset__extras__transport:url": True,
|
|
26
|
+
}
|
|
27
|
+
)
|
|
28
|
+
success(f"Removed transport:url from {nb_datasets} dataset(s)")
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
@job("map-transport-datasets")
|
|
32
|
+
def map_transport_datasets(self):
|
|
33
|
+
source = current_app.config.get("TRANSPORT_DATASETS_URL", None)
|
|
34
|
+
if not source:
|
|
35
|
+
error("TRANSPORT_DATASETS_URL variable must be set.")
|
|
36
|
+
return
|
|
37
|
+
|
|
38
|
+
response = requests.get(source)
|
|
39
|
+
if response.status_code != 200:
|
|
40
|
+
error("Remote platform unreachable.")
|
|
41
|
+
return
|
|
42
|
+
results_list = response.json()
|
|
43
|
+
clear_datasets()
|
|
44
|
+
for dataset in results_list:
|
|
45
|
+
process_dataset(dataset)
|
|
46
|
+
success(f"Done. {len(results_list)} datasets mapped to transport")
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
{
|
|
2
|
+
"$schema": "http://json-schema.org/draft-07/schema#",
|
|
3
|
+
"type": "array",
|
|
4
|
+
"uniqueItems": true,
|
|
5
|
+
"items": {
|
|
6
|
+
"type": "object",
|
|
7
|
+
"properties": {
|
|
8
|
+
"id": {
|
|
9
|
+
"type": "string"
|
|
10
|
+
},
|
|
11
|
+
"type": {
|
|
12
|
+
"type": "string"
|
|
13
|
+
},
|
|
14
|
+
"recommendations": {
|
|
15
|
+
"type": "array",
|
|
16
|
+
"uniqueItems": true,
|
|
17
|
+
"items": {
|
|
18
|
+
"type": "object",
|
|
19
|
+
"properties": {
|
|
20
|
+
"id": {
|
|
21
|
+
"type": "string"
|
|
22
|
+
},
|
|
23
|
+
"score": {
|
|
24
|
+
"type": "integer",
|
|
25
|
+
"minimum": 1,
|
|
26
|
+
"maximum": 100
|
|
27
|
+
}
|
|
28
|
+
},
|
|
29
|
+
"required": [
|
|
30
|
+
"id",
|
|
31
|
+
"score"
|
|
32
|
+
]
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
},
|
|
36
|
+
"required": [
|
|
37
|
+
"id",
|
|
38
|
+
"recommendations"
|
|
39
|
+
]
|
|
40
|
+
}
|
|
41
|
+
}
|
udata/settings.py
CHANGED
|
@@ -508,6 +508,10 @@ class Defaults(object):
|
|
|
508
508
|
ARCHIVE_COMMENT_USER_ID = None
|
|
509
509
|
ARCHIVE_COMMENT_TITLE = _("This dataset has been archived")
|
|
510
510
|
|
|
511
|
+
# Transport extras
|
|
512
|
+
##################
|
|
513
|
+
TRANSPORT_DATASETS_URL = None
|
|
514
|
+
|
|
511
515
|
# Schemas parameters
|
|
512
516
|
####################
|
|
513
517
|
SCHEMA_CATALOG_URL = None
|
|
@@ -516,6 +520,10 @@ class Defaults(object):
|
|
|
516
520
|
"https://guides.data.gouv.fr/publier-des-donnees/guide-data.gouv.fr/api/reference"
|
|
517
521
|
)
|
|
518
522
|
|
|
523
|
+
# Dataset recommendations
|
|
524
|
+
#########################
|
|
525
|
+
RECOMMENDATIONS_SOURCES = {}
|
|
526
|
+
|
|
519
527
|
# Read Only Mode
|
|
520
528
|
####################
|
|
521
529
|
# This mode can be used to mitigate a spam attack for example.
|
udata/tasks.py
CHANGED
|
@@ -163,6 +163,8 @@ def init_app(app):
|
|
|
163
163
|
import udata.core.tags.tasks # noqa
|
|
164
164
|
import udata.core.activity.tasks # noqa
|
|
165
165
|
import udata.core.dataset.tasks # noqa
|
|
166
|
+
import udata.core.dataset.transport # noqa
|
|
167
|
+
import udata.core.dataset.recommendations # noqa
|
|
166
168
|
import udata.core.spatial.tasks # noqa
|
|
167
169
|
import udata.core.reuse.tasks # noqa
|
|
168
170
|
import udata.core.user.tasks # noqa
|
|
@@ -0,0 +1,207 @@
|
|
|
1
|
+
import jsonschema
|
|
2
|
+
import pytest
|
|
3
|
+
|
|
4
|
+
from udata.core.dataset.factories import DatasetFactory
|
|
5
|
+
from udata.core.dataset.recommendations import recommendations_add, recommendations_clean
|
|
6
|
+
from udata.core.reuse.factories import ReuseFactory
|
|
7
|
+
|
|
8
|
+
MOCK_URL = "http://reco.net"
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
@pytest.fixture
|
|
12
|
+
def datasets():
|
|
13
|
+
return DatasetFactory.create_batch(3)
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
@pytest.fixture
|
|
17
|
+
def reuses():
|
|
18
|
+
return ReuseFactory.create_batch(2)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@pytest.fixture
|
|
22
|
+
def mock_invalid_response():
|
|
23
|
+
return [{"foo": "bar"}]
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
@pytest.fixture
|
|
27
|
+
def mock_response(datasets, reuses):
|
|
28
|
+
ds1, ds2, ds3 = datasets
|
|
29
|
+
r1, r2 = reuses
|
|
30
|
+
return [
|
|
31
|
+
{
|
|
32
|
+
# Invalid ID, but valid reco: should not crash the command
|
|
33
|
+
"id": "1",
|
|
34
|
+
"recommendations": [{"id": str(ds1.id), "score": 50}],
|
|
35
|
+
},
|
|
36
|
+
{
|
|
37
|
+
# valid ID and recos,
|
|
38
|
+
# should process two elements w/o crashing
|
|
39
|
+
# should reorder by score and handle reco by ID and slug
|
|
40
|
+
"id": str(ds2.id),
|
|
41
|
+
"recommendations": [
|
|
42
|
+
{"id": str(ds3.id), "score": 1},
|
|
43
|
+
{"id": str(ds1.slug), "score": 2},
|
|
44
|
+
{"id": "nope", "score": 50},
|
|
45
|
+
{
|
|
46
|
+
"id": str(r1.slug),
|
|
47
|
+
"score": 50,
|
|
48
|
+
"type": "reuse",
|
|
49
|
+
},
|
|
50
|
+
{
|
|
51
|
+
"id": str(r2.id),
|
|
52
|
+
"score": 100,
|
|
53
|
+
"type": "reuse",
|
|
54
|
+
},
|
|
55
|
+
],
|
|
56
|
+
},
|
|
57
|
+
{
|
|
58
|
+
# Valid ID but recommended dataset does not exist
|
|
59
|
+
"id": str(ds3.id),
|
|
60
|
+
"recommendations": [
|
|
61
|
+
{"id": "nope", "score": 50},
|
|
62
|
+
],
|
|
63
|
+
},
|
|
64
|
+
]
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
@pytest.mark.usefixtures("clean_db")
|
|
68
|
+
class DatasetRecommendationsTest:
|
|
69
|
+
def test_clean(self):
|
|
70
|
+
ds1 = DatasetFactory(
|
|
71
|
+
extras={
|
|
72
|
+
"untouched": "yep",
|
|
73
|
+
"recommendations:sources": ["foo", "bar"],
|
|
74
|
+
"recommendations": [
|
|
75
|
+
{"id": "id1", "source": "bar", "score": 50},
|
|
76
|
+
{"id": "id2", "source": "foo", "score": 50},
|
|
77
|
+
],
|
|
78
|
+
}
|
|
79
|
+
)
|
|
80
|
+
ds2 = DatasetFactory(
|
|
81
|
+
extras={
|
|
82
|
+
"wait": "for it",
|
|
83
|
+
"recommendations:sources": ["baz"],
|
|
84
|
+
"recommendations": [
|
|
85
|
+
{"id": "id2", "source": "baz", "score": 50},
|
|
86
|
+
],
|
|
87
|
+
}
|
|
88
|
+
)
|
|
89
|
+
|
|
90
|
+
recommendations_clean()
|
|
91
|
+
|
|
92
|
+
ds1.reload()
|
|
93
|
+
ds2.reload()
|
|
94
|
+
|
|
95
|
+
assert ds1.extras == {"untouched": "yep"}
|
|
96
|
+
assert ds2.extras == {"wait": "for it"}
|
|
97
|
+
|
|
98
|
+
def test_datasets_recommendations_invalid_data_in_config(self, mock_invalid_response, rmock):
|
|
99
|
+
rmock.get(MOCK_URL, json=mock_invalid_response)
|
|
100
|
+
|
|
101
|
+
with pytest.raises(jsonschema.exceptions.ValidationError):
|
|
102
|
+
recommendations_add({"fake_source": MOCK_URL}, should_clean=False)
|
|
103
|
+
|
|
104
|
+
def test_datasets_recommendations_from_config_empty_db(self, rmock, mock_response, datasets):
|
|
105
|
+
ds1, ds2, ds3 = datasets
|
|
106
|
+
rmock.get(MOCK_URL, json=mock_response)
|
|
107
|
+
|
|
108
|
+
recommendations_add({"fake_source": MOCK_URL}, should_clean=False)
|
|
109
|
+
|
|
110
|
+
# Correct recommendations have been filled
|
|
111
|
+
ds2.reload()
|
|
112
|
+
assert ds2.extras["recommendations:sources"] == ["fake_source"]
|
|
113
|
+
assert ds2.extras["recommendations"] == [
|
|
114
|
+
{"id": str(ds1.id), "source": "fake_source", "score": 2},
|
|
115
|
+
{"id": str(ds3.id), "source": "fake_source", "score": 1},
|
|
116
|
+
]
|
|
117
|
+
|
|
118
|
+
# Invalid recommendations have not been filled
|
|
119
|
+
ds1.reload()
|
|
120
|
+
ds3.reload()
|
|
121
|
+
assert ds1.extras == {}
|
|
122
|
+
assert ds3.extras == {}
|
|
123
|
+
|
|
124
|
+
def test_datasets_recommendations_from_config(self, rmock, mock_response, datasets, reuses):
|
|
125
|
+
ds1, ds2, ds3 = datasets
|
|
126
|
+
r1, r2 = reuses
|
|
127
|
+
ds4 = DatasetFactory()
|
|
128
|
+
rmock.get(MOCK_URL, json=mock_response)
|
|
129
|
+
ds2.extras["recommendations:sources"] = ["existing"]
|
|
130
|
+
ds2.extras["recommendations"] = [
|
|
131
|
+
{"id": str(ds4.id), "source": "existing", "score": 50},
|
|
132
|
+
]
|
|
133
|
+
ds2.save()
|
|
134
|
+
|
|
135
|
+
recommendations_add({"fake_source": MOCK_URL}, should_clean=False)
|
|
136
|
+
|
|
137
|
+
# Recommendations have been merged, new source has been added
|
|
138
|
+
ds2.reload()
|
|
139
|
+
assert set(ds2.extras["recommendations:sources"]) == set(["existing", "fake_source"])
|
|
140
|
+
assert ds2.extras["recommendations"] == [
|
|
141
|
+
{"id": str(ds4.id), "source": "existing", "score": 50},
|
|
142
|
+
{"id": str(ds1.id), "source": "fake_source", "score": 2},
|
|
143
|
+
{"id": str(ds3.id), "source": "fake_source", "score": 1},
|
|
144
|
+
]
|
|
145
|
+
assert ds2.extras["recommendations-reuses"] == [
|
|
146
|
+
{"id": str(r2.id), "source": "fake_source", "score": 100},
|
|
147
|
+
{"id": str(r1.id), "source": "fake_source", "score": 50},
|
|
148
|
+
]
|
|
149
|
+
|
|
150
|
+
def test_datasets_recommendations_from_config_clean(self, mock_response, rmock, datasets):
|
|
151
|
+
ds1, ds2, ds3 = datasets
|
|
152
|
+
rmock.get(MOCK_URL, json=mock_response)
|
|
153
|
+
|
|
154
|
+
ds1.extras["recommendations:sources"] = ["fake_source"]
|
|
155
|
+
ds1.extras["recommendations"] = [{"id": str(ds2.id), "source": "fake_source", "score": 100}]
|
|
156
|
+
ds1.save()
|
|
157
|
+
|
|
158
|
+
recommendations_add({"fake_source": MOCK_URL}, should_clean=True)
|
|
159
|
+
|
|
160
|
+
# Correct recommendations have been filled
|
|
161
|
+
ds2.reload()
|
|
162
|
+
assert ds2.extras["recommendations:sources"] == ["fake_source"]
|
|
163
|
+
assert ds2.extras["recommendations"] == [
|
|
164
|
+
{"id": str(ds1.id), "source": "fake_source", "score": 2},
|
|
165
|
+
{"id": str(ds3.id), "source": "fake_source", "score": 1},
|
|
166
|
+
]
|
|
167
|
+
|
|
168
|
+
# Previous recommendations have been cleaned
|
|
169
|
+
ds1.reload()
|
|
170
|
+
assert ds1.extras == {}
|
|
171
|
+
|
|
172
|
+
def test_datasets_recommendations_ignore_self_recommendation(self, rmock, datasets):
|
|
173
|
+
ds1, _, _ = datasets
|
|
174
|
+
rmock.get(
|
|
175
|
+
MOCK_URL,
|
|
176
|
+
json=[{"id": str(ds1.id), "recommendations": [{"id": str(ds1.id), "score": 50}]}],
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
recommendations_add({"fake_source": MOCK_URL}, should_clean=True)
|
|
180
|
+
|
|
181
|
+
ds1.reload()
|
|
182
|
+
assert ds1.extras == {}
|
|
183
|
+
|
|
184
|
+
def test_datasets_recommendations_ignore_duplicate_recommendation(self, rmock, datasets):
|
|
185
|
+
ds1, ds2, ds3 = datasets
|
|
186
|
+
ds1.extras = {"recommendations": [{"id": str(ds2), "source": "fake_source", "score": 1}]}
|
|
187
|
+
rmock.get(
|
|
188
|
+
MOCK_URL,
|
|
189
|
+
json=[
|
|
190
|
+
{
|
|
191
|
+
"id": str(ds1.id),
|
|
192
|
+
"recommendations": [
|
|
193
|
+
{"id": str(ds2.id), "score": 4},
|
|
194
|
+
{"id": str(ds3.id), "score": 5},
|
|
195
|
+
],
|
|
196
|
+
}
|
|
197
|
+
],
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
recommendations_add({"fake_source": MOCK_URL}, should_clean=True)
|
|
201
|
+
|
|
202
|
+
# The new recommendation score for ds2 will be kept instead of the old one
|
|
203
|
+
ds1.reload()
|
|
204
|
+
assert ds1.extras["recommendations"] == [
|
|
205
|
+
{"id": str(ds3.id), "source": "fake_source", "score": 5},
|
|
206
|
+
{"id": str(ds2.id), "source": "fake_source", "score": 4},
|
|
207
|
+
]
|
|
@@ -0,0 +1,75 @@
|
|
|
1
|
+
import pytest
|
|
2
|
+
import requests_mock
|
|
3
|
+
|
|
4
|
+
from udata.core.dataset.factories import DatasetFactory
|
|
5
|
+
from udata.core.dataset.transport import clear_datasets, map_transport_datasets
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@pytest.fixture
|
|
9
|
+
def mock_response():
|
|
10
|
+
return [
|
|
11
|
+
{
|
|
12
|
+
"datagouv_id": "61fd29da29ea95c7bc0e1211",
|
|
13
|
+
"id": "61fd29da29ea95c7bc0e1211",
|
|
14
|
+
"page_url": "https://transport.data.gouv.fr/datasets/horaires-theoriques-et-temps-reel-des-navettes-hivernales-de-lalpe-dhuez-gtfs-gtfs-rt",
|
|
15
|
+
"slug": "horaires-theoriques-et-temps-reel-des-navettes-hivernales-de-lalpe-dhuez-gtfs-gtfs-rt",
|
|
16
|
+
"title": "Navettes hivernales de l'Alpe d'Huez",
|
|
17
|
+
},
|
|
18
|
+
{
|
|
19
|
+
"datagouv_id": "5f23d4b3d39755210a04a99c",
|
|
20
|
+
"id": "5f23d4b3d39755210a04a99c",
|
|
21
|
+
"page_url": "https://transport.data.gouv.fr/datasets/horaires-theoriques-et-temps-reel-du-reseau-lr-11-lalouvesc-tournon-st-felicien-gtfs-gtfs-rt",
|
|
22
|
+
"slug": "horaires-theoriques-et-temps-reel-du-reseau-lr-11-lalouvesc-tournon-st-felicien-gtfs-gtfs-rt",
|
|
23
|
+
"title": "Réseau interurbain Lalouvesc / Tournon / St Felicien",
|
|
24
|
+
},
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@pytest.mark.usefixtures("clean_db")
|
|
29
|
+
class TransportTasksTest:
|
|
30
|
+
@pytest.mark.options(TRANSPORT_DATASETS_URL="http://local.test/api/datasets")
|
|
31
|
+
def test_map_transport_datasets(self, mock_response):
|
|
32
|
+
ds1 = DatasetFactory(id="61fd29da29ea95c7bc0e1211")
|
|
33
|
+
ds2 = DatasetFactory(id="5f23d4b3d39755210a04a99c")
|
|
34
|
+
|
|
35
|
+
with requests_mock.Mocker() as m:
|
|
36
|
+
m.get("http://local.test/api/datasets", json=mock_response)
|
|
37
|
+
map_transport_datasets()
|
|
38
|
+
|
|
39
|
+
ds1.reload()
|
|
40
|
+
ds2.reload()
|
|
41
|
+
|
|
42
|
+
assert (
|
|
43
|
+
ds1.extras["transport:url"]
|
|
44
|
+
== "https://transport.data.gouv.fr/datasets/horaires-theoriques-et-temps-reel-des-navettes-hivernales-de-lalpe-dhuez-gtfs-gtfs-rt"
|
|
45
|
+
)
|
|
46
|
+
assert (
|
|
47
|
+
ds2.extras["transport:url"]
|
|
48
|
+
== "https://transport.data.gouv.fr/datasets/horaires-theoriques-et-temps-reel-du-reseau-lr-11-lalouvesc-tournon-st-felicien-gtfs-gtfs-rt"
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
clear_datasets()
|
|
52
|
+
|
|
53
|
+
ds1.reload()
|
|
54
|
+
ds2.reload()
|
|
55
|
+
|
|
56
|
+
assert "transport:url" not in ds1.extras
|
|
57
|
+
assert "transport:url" not in ds2.extras
|
|
58
|
+
|
|
59
|
+
@pytest.mark.options(TRANSPORT_DATASETS_URL="http://local.test/api/datasets")
|
|
60
|
+
def test_map_transport_datasets_fail(self, mock_response):
|
|
61
|
+
"""
|
|
62
|
+
We should not erase existing transport:url extras if the job fails
|
|
63
|
+
"""
|
|
64
|
+
ds1 = DatasetFactory(id="61fd29da29ea95c7bc0e1211", extras={"transport:url": "dummy"})
|
|
65
|
+
ds2 = DatasetFactory(id="5f23d4b3d39755210a04a99c")
|
|
66
|
+
|
|
67
|
+
with requests_mock.Mocker() as m:
|
|
68
|
+
m.get("http://local.test/api/datasets", status_code=500)
|
|
69
|
+
map_transport_datasets()
|
|
70
|
+
|
|
71
|
+
ds1.reload()
|
|
72
|
+
ds2.reload()
|
|
73
|
+
|
|
74
|
+
assert ds1.extras["transport:url"] == "dummy"
|
|
75
|
+
assert "transport:url" not in ds2.extras
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: udata
|
|
3
|
-
Version: 10.5.1.
|
|
3
|
+
Version: 10.5.1.dev36043
|
|
4
4
|
Summary: Open data portal
|
|
5
5
|
Home-page: https://github.com/opendatateam/udata
|
|
6
6
|
Author: Opendata Team
|
|
@@ -147,6 +147,8 @@ It is collectively taken care of by members of the
|
|
|
147
147
|
- Expose `dataset_id` for CommunityResource in /dataset/resource/id [#3258](https://github.com/opendatateam/udata/pull/3258)
|
|
148
148
|
- Add a CI job to create a release on Sentry [#3266](https://github.com/opendatateam/udata/pull/3266)
|
|
149
149
|
- Sort extensions in `/api/1/datasets/extensions/` response alphabetically [#3358](https://github.com/opendatateam/udata/pull/3358)
|
|
150
|
+
- Migrate recommendations tasks, [udata-recommendations](https://github.com/opendatateam/udata-recommendations/) is no longer required [#3355](https://github.com/opendatateam/udata/pull/3355)
|
|
151
|
+
- Migrate udata-transport tasks, [udata-transport](https://github.com/opendatateam/udata-transport/) is no longer required [#3355](https://github.com/opendatateam/udata/pull/3355)
|
|
150
152
|
|
|
151
153
|
## 10.5.0 (2025-07-02)
|
|
152
154
|
|
|
@@ -13,10 +13,10 @@ udata/mail.py,sha256=FMGHcDAjHvk86iDUwBmVXpx3vbAb2c-j5C3BRnh9IYQ,2670
|
|
|
13
13
|
udata/rdf.py,sha256=JmMxwq4fFBrBZQhJ6O9_nEeYUXspPzoZGTyGUD4Nyxs,18348
|
|
14
14
|
udata/routing.py,sha256=E6sE1F74QyOoz5vcgEi-rNEhCegwLfOtBz5I9fWk-pM,7677
|
|
15
15
|
udata/sentry.py,sha256=ekcxqUSqxfM98TtvCsPaOoX5i2l6PEcYt7kb4l3od-Q,3223
|
|
16
|
-
udata/settings.py,sha256=
|
|
16
|
+
udata/settings.py,sha256=dMe5iB5NI3fKfFr4YgO1K8OKj-7FQLcT5mSteXUE-5g,19426
|
|
17
17
|
udata/sitemap.py,sha256=oRRWoPI7ZsFFnUAOqGT1YuXFFKHBe8EcRnUCNHD7xjM,979
|
|
18
18
|
udata/tags.py,sha256=ydq4uokd6bzdeGVSpEXASVtGvDfO2LfQs9mptvvKJCM,631
|
|
19
|
-
udata/tasks.py,sha256=
|
|
19
|
+
udata/tasks.py,sha256=yTYBJG5bzEChX27p3MSqurSji84rg7w7OUvK4vuPRfY,5080
|
|
20
20
|
udata/terms.md,sha256=nFx978tUQ3vTEv6POykXaZvcQ5e_gcvmO4ZgcfbSWXo,187
|
|
21
21
|
udata/tracking.py,sha256=WOcqA1RlHN8EPFuEc2kNau54mec4-pvi-wUFrMXevzg,345
|
|
22
22
|
udata/uris.py,sha256=sIhlzpwFO7ftOHYgTZmR7mCoty6a1n4KC4c0Qmx3lqo,3655
|
|
@@ -109,9 +109,11 @@ udata/core/dataset/models.py,sha256=_zsUDRl4xUt2Rp1QuIw2f85FxBNx_e2XgpiXVxVQ-tE,
|
|
|
109
109
|
udata/core/dataset/permissions.py,sha256=zXQ6kU-Ni3Pl5tDtat-ZPupug9InsNeCN7xRLc2Vcrc,1097
|
|
110
110
|
udata/core/dataset/preview.py,sha256=IwCqiNTjjXbtA_SSKF52pwnzKKEz0GyYM95QNn2Dkog,2561
|
|
111
111
|
udata/core/dataset/rdf.py,sha256=Bc83-JdY_2yuSUGnELVlhIQd7j1zIEX6d6CCw5RrOL4,31728
|
|
112
|
+
udata/core/dataset/recommendations.py,sha256=DlGSLU8D0nW6Ds1rjBav1WxC-0VW5yOCjkO5w-ltFcI,7171
|
|
112
113
|
udata/core/dataset/search.py,sha256=E7LqHBnq3sMefvmLwTpiw-Ovem2a3NJswHesRjctboE,5627
|
|
113
114
|
udata/core/dataset/signals.py,sha256=WN4sV-lJlNsRkhcnhoy0SYJvCoYmK_5QFYZd1u-h4gs,161
|
|
114
115
|
udata/core/dataset/tasks.py,sha256=6FzeLzJRQxzq7sBLUE8H8ZGLByix2EDOzGAsA8FteX8,10019
|
|
116
|
+
udata/core/dataset/transport.py,sha256=ihCXirY1dZjOfXKbf9HRCJTfIOc75rM1McwbeGjsW6A,1296
|
|
115
117
|
udata/core/discussions/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
116
118
|
udata/core/discussions/actions.py,sha256=kjdBLDIeu0yWTSxQGgOpN1WoxUqbMygn4SiBk_S9T5I,1051
|
|
117
119
|
udata/core/discussions/api.py,sha256=u9atdyPVGpyLin4IRF1ZhMp6gRmtJvV0Y8ksic_nbJo,12807
|
|
@@ -385,6 +387,7 @@ udata/mongo/url_field.py,sha256=UmUr9c5SxDFDpS5QsRTq2pKcCTOr1SoB4UITwNjtuaI,1345
|
|
|
385
387
|
udata/mongo/uuid_fields.py,sha256=tuQ3zs_BnQHjaiKSIYv43jxvYtOvRLw9nP5CQ3fcMks,482
|
|
386
388
|
udata/notifications/__init__.py,sha256=ZrSpV2zI9bZ0oz8tGsnA8hjDdGeU7YDdgvOLo70aohg,54
|
|
387
389
|
udata/notifications/mattermost.py,sha256=v8VrDg0iEVsioSSEgukhCK0lz9Lxy8H16z_gTQry1ko,783
|
|
390
|
+
udata/schemas/recommendations.json,sha256=VTQ3NbssGqYER2R9MKgjUfhVERox_PdR2Ccr1S-WAU8,778
|
|
388
391
|
udata/search/__init__.py,sha256=09cBlIPoQePn20KL9AjfDwazFUmXTBiAOcUrcFtSA-o,3945
|
|
389
392
|
udata/search/adapter.py,sha256=XgTUSLPEtJPsd1T2XKekOT4RYxIC2fKuwXGi_FuVnFw,2425
|
|
390
393
|
udata/search/commands.py,sha256=_gKSBQGqZ06aE-6m-NqBH2xlIzezscNgpr9GyxCZLgA,5415
|
|
@@ -664,8 +667,10 @@ udata/tests/dataset/test_dataset_commands.py,sha256=zMPJG2wYwKBee2zI65kmboxf59Zq
|
|
|
664
667
|
udata/tests/dataset/test_dataset_events.py,sha256=hlrpoOiBbnX_COUI9Pzdqlp45GZZDqu5piwupbnPiTI,3601
|
|
665
668
|
udata/tests/dataset/test_dataset_model.py,sha256=av4RhOnT-52qs-WL1NCn6R2SMa_fzDcoV-06vjccsdw,33891
|
|
666
669
|
udata/tests/dataset/test_dataset_rdf.py,sha256=7SorX0e0VD3hmj8C0qXA4Vb3Q3xl2qaE4ijRfeQ12PM,44537
|
|
670
|
+
udata/tests/dataset/test_dataset_recommendations.py,sha256=K52HXGXi9DuUSiSRQWpqTrsFCkYbv7K3upNp0fFp5v8,7068
|
|
667
671
|
udata/tests/dataset/test_dataset_tasks.py,sha256=n1W2Pg0ez02d66zQG3N93kh7dpR2yLMRDqUI6PnPaI0,3088
|
|
668
672
|
udata/tests/dataset/test_resource_preview.py,sha256=fp9mSL7unhyM66GR0gwhgX3OGQ4TJt7G9xU-CjsL3HI,3908
|
|
673
|
+
udata/tests/dataset/test_transport_tasks.py,sha256=BYr1WPV0Crirzb2jC_wAV4y_pzImRCAS4zYZ7qvCDeY,2964
|
|
669
674
|
udata/tests/features/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
670
675
|
udata/tests/features/territories/__init__.py,sha256=gMD73RL-ymcWvGPDPM0aPxz7WAfd1VEDL8YHRI7HT0Q,956
|
|
671
676
|
udata/tests/features/territories/test_territories_api.py,sha256=UA5j5ZqLP3L3PdddMYJdizmftUFIjj3wWHOt5M2kQtU,7527
|
|
@@ -731,9 +736,9 @@ udata/translations/pt/LC_MESSAGES/udata.mo,sha256=9sCd1MUKvtVP_sOXvK-G5v4PfWkkdA
|
|
|
731
736
|
udata/translations/pt/LC_MESSAGES/udata.po,sha256=-eJptz9s63rjkdm-3HJi_2t70pyv3-8EuXBn-B2qI_4,48419
|
|
732
737
|
udata/translations/sr/LC_MESSAGES/udata.mo,sha256=qduXntHWe__KaUxJ4JwwyGG3eSgYb1auGdNax0lS49c,29169
|
|
733
738
|
udata/translations/sr/LC_MESSAGES/udata.po,sha256=6QCuLMCRjgyAvu9U7i0P19ae8fm_uStfmxHLqUO9EoY,55394
|
|
734
|
-
udata-10.5.1.
|
|
735
|
-
udata-10.5.1.
|
|
736
|
-
udata-10.5.1.
|
|
737
|
-
udata-10.5.1.
|
|
738
|
-
udata-10.5.1.
|
|
739
|
-
udata-10.5.1.
|
|
739
|
+
udata-10.5.1.dev36043.dist-info/LICENSE,sha256=V8j_M8nAz8PvAOZQocyRDX7keai8UJ9skgmnwqETmdY,34520
|
|
740
|
+
udata-10.5.1.dev36043.dist-info/METADATA,sha256=ztFjG0Y_tBYSYsclO0hDbINhP4CkTlwicVN3r6YV22E,149836
|
|
741
|
+
udata-10.5.1.dev36043.dist-info/WHEEL,sha256=Kh9pAotZVRFj97E15yTA4iADqXdQfIVTHcNaZTjxeGM,110
|
|
742
|
+
udata-10.5.1.dev36043.dist-info/entry_points.txt,sha256=ETvkR4r6G1duBsh_V_fGWENQy17GTFuobi95MYBAl1A,498
|
|
743
|
+
udata-10.5.1.dev36043.dist-info/top_level.txt,sha256=39OCg-VWFWOq4gCKnjKNu-s3OwFlZIu_dVH8Gl6ndHw,12
|
|
744
|
+
udata-10.5.1.dev36043.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|