udata 6.1.6.dev25761__py2.py3-none-any.whl → 6.1.7__py2.py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of udata might be problematic. Click here for more details.
- udata/__init__.py +1 -1
- udata/core/dataset/api_fields.py +20 -2
- udata/core/dataset/apiv2.py +8 -2
- udata/core/dataset/csv.py +1 -0
- udata/core/dataset/models.py +14 -0
- udata/core/dataset/rdf.py +3 -3
- udata/core/dataset/search.py +6 -2
- udata/core/discussions/api.py +4 -1
- udata/core/discussions/tasks.py +2 -2
- udata/core/organization/api_fields.py +1 -0
- udata/core/organization/forms.py +28 -0
- udata/core/organization/models.py +3 -0
- udata/core/topic/api.py +1 -0
- udata/core/topic/forms.py +1 -0
- udata/core/topic/models.py +20 -0
- udata/harvest/backends/dcat.py +10 -1
- udata/harvest/tests/test_dcat_backend.py +26 -1
- udata/models/slug_fields.py +4 -0
- udata/search/commands.py +8 -2
- udata/settings.py +7 -0
- udata/static/admin.js +19 -19
- udata/static/admin.js.map +1 -1
- udata/static/chunks/{1.01e4e2be1923d2ac99ae.js → 1.2a80f2805f0ad61574e4.js} +2 -2
- udata/static/chunks/1.2a80f2805f0ad61574e4.js.map +1 -0
- udata/static/chunks/{29.8a0027454d42f6c5ed8b.js → 29.7b551c87d15f96f30b57.js} +2 -2
- udata/static/chunks/29.7b551c87d15f96f30b57.js.map +1 -0
- udata/static/common.js +1 -1
- udata/static/common.js.map +1 -1
- udata/templates/macros/metadata.html +3 -0
- udata/templates/mail/badge_added_certified.html +1 -1
- udata/templates/mail/badge_added_public_service.html +1 -1
- udata/templates/mail/discussion_closed.html +1 -1
- udata/templates/mail/membership_request.html +1 -1
- udata/templates/mail/new_discussion.html +1 -1
- udata/templates/mail/new_discussion_comment.html +1 -1
- udata/templates/mail/user_mail_card.html +1 -1
- udata/tests/api/test_datasets_api.py +9 -1
- udata/tests/api/test_organizations_api.py +36 -2
- udata/tests/api/test_topics_api.py +1 -0
- udata/tests/frontend/test_markdown.py +15 -0
- udata/tests/test_discussions.py +80 -1
- udata/tests/test_model.py +18 -0
- udata/tests/test_topics.py +34 -0
- udata/translations/ar/LC_MESSAGES/udata.mo +0 -0
- udata/translations/ar/LC_MESSAGES/udata.po +37 -14
- udata/translations/es/LC_MESSAGES/udata.mo +0 -0
- udata/translations/es/LC_MESSAGES/udata.po +37 -14
- udata/translations/fr/LC_MESSAGES/udata.mo +0 -0
- udata/translations/fr/LC_MESSAGES/udata.po +37 -14
- udata/translations/it/LC_MESSAGES/udata.mo +0 -0
- udata/translations/it/LC_MESSAGES/udata.po +37 -14
- udata/translations/pt/LC_MESSAGES/udata.mo +0 -0
- udata/translations/pt/LC_MESSAGES/udata.po +37 -14
- udata/translations/sr/LC_MESSAGES/udata.mo +0 -0
- udata/translations/sr/LC_MESSAGES/udata.po +37 -14
- udata/translations/udata.pot +38 -15
- {udata-6.1.6.dev25761.dist-info → udata-6.1.7.dist-info}/METADATA +108 -95
- {udata-6.1.6.dev25761.dist-info → udata-6.1.7.dist-info}/RECORD +62 -61
- {udata-6.1.6.dev25761.dist-info → udata-6.1.7.dist-info}/WHEEL +1 -1
- udata/static/chunks/1.01e4e2be1923d2ac99ae.js.map +0 -1
- udata/static/chunks/29.8a0027454d42f6c5ed8b.js.map +0 -1
- {udata-6.1.6.dev25761.dist-info → udata-6.1.7.dist-info}/LICENSE +0 -0
- {udata-6.1.6.dev25761.dist-info → udata-6.1.7.dist-info}/entry_points.txt +0 -0
- {udata-6.1.6.dev25761.dist-info → udata-6.1.7.dist-info}/top_level.txt +0 -0
udata/__init__.py
CHANGED
udata/core/dataset/api_fields.py
CHANGED
|
@@ -73,6 +73,13 @@ frequency_fields = api.model('Frequency', {
|
|
|
73
73
|
'label': fields.String(description='The frequency display name')
|
|
74
74
|
})
|
|
75
75
|
|
|
76
|
+
resource_internal_fields = api.model('ResourceInternals', {
|
|
77
|
+
'created_at_internal': fields.ISODateTime(
|
|
78
|
+
description='The resource\'s internal creation date on the site', required=True),
|
|
79
|
+
'last_modified_internal': fields.ISODateTime(
|
|
80
|
+
description='The resource\'s internal last modification date', required=True),
|
|
81
|
+
})
|
|
82
|
+
|
|
76
83
|
resource_fields = api.model('Resource', {
|
|
77
84
|
'id': fields.String(description='The resource unique ID', readonly=True),
|
|
78
85
|
'title': fields.String(description='The resource title', required=True),
|
|
@@ -112,6 +119,8 @@ resource_fields = api.model('Resource', {
|
|
|
112
119
|
'new page)',
|
|
113
120
|
readonly=True),
|
|
114
121
|
'schema': fields.Raw(description='Reference to the associated schema', readonly=True),
|
|
122
|
+
'internal': fields.Nested(
|
|
123
|
+
resource_internal_fields, readonly=True, description='Site internal and specific object\'s data'),
|
|
115
124
|
})
|
|
116
125
|
|
|
117
126
|
upload_fields = api.inherit('UploadedResource', resource_fields, {
|
|
@@ -162,9 +171,16 @@ DEFAULT_MASK = ','.join((
|
|
|
162
171
|
'id', 'title', 'acronym', 'slug', 'description', 'created_at', 'last_modified', 'deleted',
|
|
163
172
|
'private', 'tags', 'badges', 'resources', 'frequency', 'frequency_date', 'extras', 'harvest',
|
|
164
173
|
'metrics', 'organization', 'owner', 'temporal_coverage', 'spatial', 'license',
|
|
165
|
-
'uri', 'page', 'last_update', 'archived', 'quality'
|
|
174
|
+
'uri', 'page', 'last_update', 'archived', 'quality', 'internal'
|
|
166
175
|
))
|
|
167
176
|
|
|
177
|
+
dataset_internal_fields = api.model('DatasetInternals', {
|
|
178
|
+
'created_at_internal': fields.ISODateTime(
|
|
179
|
+
description='The dataset\'s internal creation date on the site', required=True),
|
|
180
|
+
'last_modified_internal': fields.ISODateTime(
|
|
181
|
+
description='The dataset\'s internal last modification date', required=True),
|
|
182
|
+
})
|
|
183
|
+
|
|
168
184
|
dataset_fields = api.model('Dataset', {
|
|
169
185
|
'id': fields.String(description='The dataset identifier', readonly=True),
|
|
170
186
|
'title': fields.String(description='The dataset title', required=True),
|
|
@@ -174,7 +190,7 @@ dataset_fields = api.model('Dataset', {
|
|
|
174
190
|
'description': fields.Markdown(
|
|
175
191
|
description='The dataset description in markdown', required=True),
|
|
176
192
|
'created_at': fields.ISODateTime(
|
|
177
|
-
description='
|
|
193
|
+
description='This date is computed between harvested creation date if any and site\'s internal creation date' , required=True),
|
|
178
194
|
'last_modified': fields.ISODateTime(
|
|
179
195
|
description='The dataset last modification date', required=True),
|
|
180
196
|
'deleted': fields.ISODateTime(description='The deletion date if deleted'),
|
|
@@ -228,6 +244,8 @@ dataset_fields = api.model('Dataset', {
|
|
|
228
244
|
'quality': fields.Raw(description='The dataset quality', readonly=True),
|
|
229
245
|
'last_update': fields.ISODateTime(
|
|
230
246
|
description='The resources last modification date', required=True),
|
|
247
|
+
'internal': fields.Nested(
|
|
248
|
+
dataset_internal_fields, readonly=True, description='Site internal and specific object\'s data'),
|
|
231
249
|
}, mask=DEFAULT_MASK)
|
|
232
250
|
|
|
233
251
|
dataset_page_fields = api.model('DatasetPage', fields.pager(dataset_fields),
|
udata/core/dataset/apiv2.py
CHANGED
|
@@ -16,7 +16,9 @@ from .api_fields import (
|
|
|
16
16
|
user_ref_fields,
|
|
17
17
|
checksum_fields,
|
|
18
18
|
dataset_harvest_fields,
|
|
19
|
-
|
|
19
|
+
dataset_internal_fields,
|
|
20
|
+
resource_harvest_fields,
|
|
21
|
+
resource_internal_fields
|
|
20
22
|
)
|
|
21
23
|
from udata.core.spatial.api_fields import geojson
|
|
22
24
|
from .models import (
|
|
@@ -33,7 +35,7 @@ DEFAULT_MASK_APIV2 = ','.join((
|
|
|
33
35
|
'id', 'title', 'acronym', 'slug', 'description', 'created_at', 'last_modified', 'deleted',
|
|
34
36
|
'private', 'tags', 'badges', 'resources', 'community_resources', 'frequency', 'frequency_date',
|
|
35
37
|
'extras', 'metrics', 'organization', 'owner', 'temporal_coverage', 'spatial', 'license',
|
|
36
|
-
'uri', 'page', 'last_update', 'archived', 'quality', 'harvest'
|
|
38
|
+
'uri', 'page', 'last_update', 'archived', 'quality', 'harvest', 'internal'
|
|
37
39
|
))
|
|
38
40
|
|
|
39
41
|
log = logging.getLogger(__name__)
|
|
@@ -129,6 +131,8 @@ dataset_fields = apiv2.model('Dataset', {
|
|
|
129
131
|
'quality': fields.Raw(description='The dataset quality', readonly=True),
|
|
130
132
|
'last_update': fields.ISODateTime(
|
|
131
133
|
description='The resources last modification date', required=True),
|
|
134
|
+
'internal': fields.Nested(
|
|
135
|
+
dataset_internal_fields, readonly=True, description='Site internal and specific object\'s data')
|
|
132
136
|
}, mask=DEFAULT_MASK_APIV2)
|
|
133
137
|
|
|
134
138
|
|
|
@@ -164,6 +168,8 @@ apiv2.inherit('GeoJSON', geojson)
|
|
|
164
168
|
apiv2.inherit('Checksum', checksum_fields)
|
|
165
169
|
apiv2.inherit('HarvestDatasetMetadata', dataset_harvest_fields)
|
|
166
170
|
apiv2.inherit('HarvestResourceMetadata', resource_harvest_fields)
|
|
171
|
+
apiv2.inherit('DatasetInternals', dataset_internal_fields)
|
|
172
|
+
apiv2.inherit('ResourceInternals', resource_internal_fields)
|
|
167
173
|
|
|
168
174
|
|
|
169
175
|
@ns.route('/search/', endpoint='dataset_search')
|
udata/core/dataset/csv.py
CHANGED
udata/core/dataset/models.py
CHANGED
|
@@ -294,6 +294,13 @@ class ResourceMixin(object):
|
|
|
294
294
|
last_modified_internal = db.DateTimeField(default=datetime.utcnow, required=True)
|
|
295
295
|
deleted = db.DateTimeField()
|
|
296
296
|
|
|
297
|
+
@property
|
|
298
|
+
def internal(self):
|
|
299
|
+
return {
|
|
300
|
+
'created_at_internal': self.created_at_internal,
|
|
301
|
+
'last_modified_internal': self.last_modified_internal
|
|
302
|
+
}
|
|
303
|
+
|
|
297
304
|
@property
|
|
298
305
|
def created_at(self):
|
|
299
306
|
return self.harvest.created_at if self.harvest and self.harvest.created_at else self.created_at_internal
|
|
@@ -827,6 +834,13 @@ class Dataset(WithMetrics, BadgeMixin, db.Owned, db.Document):
|
|
|
827
834
|
|
|
828
835
|
return result
|
|
829
836
|
|
|
837
|
+
@property
|
|
838
|
+
def internal(self):
|
|
839
|
+
return {
|
|
840
|
+
'created_at_internal': self.created_at_internal,
|
|
841
|
+
'last_modified_internal': self.last_modified_internal
|
|
842
|
+
}
|
|
843
|
+
|
|
830
844
|
@property
|
|
831
845
|
def views_count(self):
|
|
832
846
|
return self.metrics.get('views', 0)
|
udata/core/dataset/rdf.py
CHANGED
|
@@ -319,15 +319,15 @@ def frequency_from_rdf(term):
|
|
|
319
319
|
except uris.ValidationError:
|
|
320
320
|
pass
|
|
321
321
|
if isinstance(term, Literal):
|
|
322
|
-
if term.toPython() in UPDATE_FREQUENCIES:
|
|
323
|
-
return term.toPython()
|
|
322
|
+
if term.toPython().lower() in UPDATE_FREQUENCIES:
|
|
323
|
+
return term.toPython().lower()
|
|
324
324
|
if isinstance(term, RdfResource):
|
|
325
325
|
term = term.identifier
|
|
326
326
|
if isinstance(term, URIRef):
|
|
327
327
|
if EUFREQ in term:
|
|
328
328
|
return EU_RDF_REQUENCIES.get(term)
|
|
329
329
|
_, _, freq = namespace_manager.compute_qname(term)
|
|
330
|
-
return freq
|
|
330
|
+
return freq.lower()
|
|
331
331
|
|
|
332
332
|
|
|
333
333
|
def mime_from_rdf(resource):
|
udata/core/dataset/search.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import datetime
|
|
2
2
|
from udata.models import (
|
|
3
|
-
Dataset, Organization, User, GeoZone, License
|
|
3
|
+
Dataset, Organization, User, GeoZone, License, Topic
|
|
4
4
|
)
|
|
5
5
|
from udata.search import (
|
|
6
6
|
ModelSearchAdapter, register,
|
|
@@ -41,6 +41,7 @@ class DatasetSearch(ModelSearchAdapter):
|
|
|
41
41
|
'schema': Filter(),
|
|
42
42
|
'temporal_coverage': TemporalCoverageFilter(),
|
|
43
43
|
'featured': BoolFilter(),
|
|
44
|
+
'topic': ModelTermsFilter(model=Topic),
|
|
44
45
|
}
|
|
45
46
|
|
|
46
47
|
@classmethod
|
|
@@ -63,6 +64,8 @@ class DatasetSearch(ModelSearchAdapter):
|
|
|
63
64
|
organization = None
|
|
64
65
|
owner = None
|
|
65
66
|
|
|
67
|
+
topics = Topic.objects(datasets=dataset)
|
|
68
|
+
|
|
66
69
|
if dataset.organization:
|
|
67
70
|
org = Organization.objects(id=dataset.organization.id).first()
|
|
68
71
|
organization = {
|
|
@@ -94,7 +97,8 @@ class DatasetSearch(ModelSearchAdapter):
|
|
|
94
97
|
'organization': organization,
|
|
95
98
|
'owner': str(owner.id) if owner else None,
|
|
96
99
|
'format': [r.format.lower() for r in dataset.resources if r.format],
|
|
97
|
-
'schema': [r.schema.get('name') for r in dataset.resources if r.schema]
|
|
100
|
+
'schema': [r.schema.get('name') for r in dataset.resources if r.schema],
|
|
101
|
+
'topics': [t.slug for t in topics if topics],
|
|
98
102
|
}
|
|
99
103
|
extras = {}
|
|
100
104
|
for key, value in dataset.extras.items():
|
udata/core/discussions/api.py
CHANGED
|
@@ -101,11 +101,14 @@ class DiscussionAPI(API):
|
|
|
101
101
|
@api.secure
|
|
102
102
|
@api.doc('comment_discussion')
|
|
103
103
|
@api.expect(comment_discussion_fields)
|
|
104
|
-
@api.response(403, 'Not allowed to close this discussion'
|
|
104
|
+
@api.response(403, 'Not allowed to close this discussion '
|
|
105
|
+
"OR can't add comments on a closed discussion")
|
|
105
106
|
@api.marshal_with(discussion_fields)
|
|
106
107
|
def post(self, id):
|
|
107
108
|
'''Add comment and optionally close a discussion given its ID'''
|
|
108
109
|
discussion = Discussion.objects.get_or_404(id=id_or_404(id))
|
|
110
|
+
if discussion.closed:
|
|
111
|
+
api.abort(403, "Can't add comments on a closed discussion")
|
|
109
112
|
form = api.validate(DiscussionCommentForm)
|
|
110
113
|
message = Message(
|
|
111
114
|
content=form.comment.data,
|
udata/core/discussions/tasks.py
CHANGED
|
@@ -41,7 +41,7 @@ def notify_new_discussion_comment(discussion_id, message=None):
|
|
|
41
41
|
if isinstance(discussion.subject, (Dataset, Reuse, Post)):
|
|
42
42
|
recipients = owner_recipients(discussion) + [
|
|
43
43
|
m.posted_by for m in discussion.discussion]
|
|
44
|
-
recipients =
|
|
44
|
+
recipients = list({u.id: u for u in recipients if u != message.posted_by}.values())
|
|
45
45
|
subject = _('%(user)s commented your discussion',
|
|
46
46
|
user=message.posted_by.fullname)
|
|
47
47
|
|
|
@@ -59,7 +59,7 @@ def notify_discussion_closed(discussion_id, message=None):
|
|
|
59
59
|
if isinstance(discussion.subject, (Dataset, Reuse, Post)):
|
|
60
60
|
recipients = owner_recipients(discussion) + [
|
|
61
61
|
m.posted_by for m in discussion.discussion]
|
|
62
|
-
recipients =
|
|
62
|
+
recipients = list({u.id: u for u in recipients if u != message.posted_by}.values())
|
|
63
63
|
subject = _('A discussion has been closed')
|
|
64
64
|
mail.send(subject, recipients, 'discussion_closed',
|
|
65
65
|
discussion=discussion, message=message)
|
|
@@ -61,6 +61,7 @@ org_fields = api.model('Organization', {
|
|
|
61
61
|
required=True),
|
|
62
62
|
'description': fields.Markdown(
|
|
63
63
|
description='The organization description in Markdown', required=True),
|
|
64
|
+
'business_number_id': fields.String(description='The organization\'s business identification number.'),
|
|
64
65
|
'created_at': fields.ISODateTime(
|
|
65
66
|
description='The organization creation date', readonly=True),
|
|
66
67
|
'last_modified': fields.ISODateTime(
|
udata/core/organization/forms.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
from flask import current_app
|
|
2
|
+
|
|
1
3
|
from udata.auth import current_user
|
|
2
4
|
from udata.forms import Form, ModelForm, fields, validators
|
|
3
5
|
from udata.i18n import lazy_gettext as _
|
|
@@ -15,6 +17,30 @@ __all__ = (
|
|
|
15
17
|
)
|
|
16
18
|
|
|
17
19
|
|
|
20
|
+
def org_bid_check(form, field):
|
|
21
|
+
if field.data:
|
|
22
|
+
# EID checks are country dependant. Following one is suitable for France.
|
|
23
|
+
if current_app.config.get('ORG_BID_FORMAT') == 'siret':
|
|
24
|
+
siret_number = str(field.data)
|
|
25
|
+
# Length control done here instead of using WTForm validator because field must remain optional.
|
|
26
|
+
if len(siret_number) != 14:
|
|
27
|
+
raise validators.ValidationError(_('A siret number is made of 14 digits'))
|
|
28
|
+
# Siret exception for the french postal service which business number id does not match the algorithm.
|
|
29
|
+
if siret_number == '35600000000048':
|
|
30
|
+
return
|
|
31
|
+
# Checksum verification on only the SIREN part, the 9 first digits.
|
|
32
|
+
try:
|
|
33
|
+
chiffres = [int(chiffre) for chiffre in siret_number[:9]]
|
|
34
|
+
chiffres[1::2] = [chiffre * 2 for chiffre in chiffres[1::2]]
|
|
35
|
+
chiffres = [chiffre - 9 if chiffre > 9 else chiffre for chiffre in chiffres]
|
|
36
|
+
total = sum(chiffres)
|
|
37
|
+
except ValueError:
|
|
38
|
+
raise validators.ValidationError(_('A siret number is only made of digits'))
|
|
39
|
+
|
|
40
|
+
if not total % 10 == 0:
|
|
41
|
+
raise validators.ValidationError(_('Invalid Siret number'))
|
|
42
|
+
|
|
43
|
+
|
|
18
44
|
class OrganizationForm(ModelForm):
|
|
19
45
|
model_class = Organization
|
|
20
46
|
|
|
@@ -27,6 +53,8 @@ class OrganizationForm(ModelForm):
|
|
|
27
53
|
url = fields.URLField(
|
|
28
54
|
_('Website'), description=_('The organization website URL'))
|
|
29
55
|
logo = fields.ImageField(_('Logo'), sizes=LOGO_SIZES)
|
|
56
|
+
business_number_id = fields.StringField(_('Business id'), [org_bid_check],
|
|
57
|
+
description=_('Business identification number'))
|
|
30
58
|
|
|
31
59
|
deleted = fields.DateTimeField()
|
|
32
60
|
extras = fields.ExtrasField()
|
|
@@ -40,6 +40,8 @@ CERTIFIED = 'certified'
|
|
|
40
40
|
TITLE_SIZE_LIMIT = 350
|
|
41
41
|
DESCRIPTION_SIZE_LIMIT = 100000
|
|
42
42
|
|
|
43
|
+
ORG_BID_SIZE_LIMIT = 14
|
|
44
|
+
|
|
43
45
|
|
|
44
46
|
class Team(db.EmbeddedDocument):
|
|
45
47
|
name = db.StringField(required=True)
|
|
@@ -104,6 +106,7 @@ class Organization(WithMetrics, BadgeMixin, db.Datetimed, db.Document):
|
|
|
104
106
|
image_url = db.StringField()
|
|
105
107
|
logo = db.ImageField(fs=avatars, basename=default_image_basename,
|
|
106
108
|
max_size=LOGO_MAX_SIZE, thumbnails=LOGO_SIZES)
|
|
109
|
+
business_number_id = db.StringField(max_length=ORG_BID_SIZE_LIMIT)
|
|
107
110
|
|
|
108
111
|
members = db.ListField(db.EmbeddedDocumentField(Member))
|
|
109
112
|
teams = db.ListField(db.EmbeddedDocumentField(Team))
|
udata/core/topic/api.py
CHANGED
|
@@ -41,6 +41,7 @@ topic_fields = api.model('Topic', {
|
|
|
41
41
|
'page': fields.UrlFor(
|
|
42
42
|
'topics.display', lambda o: {'topic': o},
|
|
43
43
|
description='The topic page URL', readonly=True, fallback_endpoint='api.topic'),
|
|
44
|
+
'extras': fields.Raw(description='Extras attributes as key-value pairs'),
|
|
44
45
|
}, mask='*,datasets{id,title,uri,page},reuses{id,title, image, image_thumbnail,uri,page}')
|
|
45
46
|
|
|
46
47
|
|
udata/core/topic/forms.py
CHANGED
udata/core/topic/models.py
CHANGED
|
@@ -1,6 +1,9 @@
|
|
|
1
1
|
from flask import url_for
|
|
2
2
|
|
|
3
|
+
from mongoengine.signals import pre_save
|
|
3
4
|
from udata.models import db
|
|
5
|
+
from udata.search import reindex
|
|
6
|
+
from udata.tasks import as_task_param
|
|
4
7
|
|
|
5
8
|
|
|
6
9
|
__all__ = ('Topic', )
|
|
@@ -23,10 +26,27 @@ class Topic(db.Document):
|
|
|
23
26
|
owner = db.ReferenceField('User')
|
|
24
27
|
featured = db.BooleanField()
|
|
25
28
|
private = db.BooleanField()
|
|
29
|
+
extras = db.ExtrasField()
|
|
26
30
|
|
|
27
31
|
def __str__(self):
|
|
28
32
|
return self.name
|
|
29
33
|
|
|
34
|
+
@classmethod
|
|
35
|
+
def pre_save(cls, sender, document, **kwargs):
|
|
36
|
+
# Try catch is to prevent the mechanism to crash at the
|
|
37
|
+
# creation of the Topic, where an original state does not exist.
|
|
38
|
+
try:
|
|
39
|
+
original_doc = sender.objects.get(id=document.id)
|
|
40
|
+
# Get the diff between the original and current datasets
|
|
41
|
+
datasets_list_dif = set(original_doc.datasets) ^ set(document.datasets)
|
|
42
|
+
except cls.DoesNotExist:
|
|
43
|
+
datasets_list_dif = document.datasets
|
|
44
|
+
for dataset in datasets_list_dif:
|
|
45
|
+
reindex.delay(*as_task_param(dataset))
|
|
46
|
+
|
|
30
47
|
@property
|
|
31
48
|
def display_url(self):
|
|
32
49
|
return url_for('topics.display', topic=self)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
pre_save.connect(Topic.pre_save, sender=Topic)
|
udata/harvest/backends/dcat.py
CHANGED
|
@@ -35,6 +35,12 @@ KNOWN_PAGINATION = (
|
|
|
35
35
|
(HYDRA.PagedCollection, HYDRA.nextPage)
|
|
36
36
|
)
|
|
37
37
|
|
|
38
|
+
# Useful to patch essential failing URIs
|
|
39
|
+
URIS_TO_REPLACE = {
|
|
40
|
+
# See https://github.com/etalab/data.gouv.fr/issues/1151
|
|
41
|
+
'https://project-open-data.cio.gov/v1.1/schema/catalog.jsonld': 'https://gist.githubusercontent.com/maudetes/f019586185d6f59dcfb07f97148a1973/raw/585c3c7bf602b5a4e635b137257d0619792e2c1f/gistfile1.txt' # noqa
|
|
42
|
+
}
|
|
43
|
+
|
|
38
44
|
|
|
39
45
|
def extract_graph(source, target, node, specs):
|
|
40
46
|
for p, o in source.predicate_objects(node):
|
|
@@ -80,7 +86,10 @@ class DcatBackend(BaseBackend):
|
|
|
80
86
|
page = 0
|
|
81
87
|
while url:
|
|
82
88
|
subgraph = Graph(namespace_manager=namespace_manager)
|
|
83
|
-
|
|
89
|
+
data = requests.get(url).text
|
|
90
|
+
for old_uri, new_uri in URIS_TO_REPLACE.items():
|
|
91
|
+
data = data.replace(old_uri, new_uri)
|
|
92
|
+
subgraph.parse(data=data, format=fmt)
|
|
84
93
|
|
|
85
94
|
url = None
|
|
86
95
|
for cls, prop in KNOWN_PAGINATION:
|
|
@@ -10,6 +10,7 @@ from udata.core.organization.factories import OrganizationFactory
|
|
|
10
10
|
from udata.core.dataset.factories import LicenseFactory
|
|
11
11
|
|
|
12
12
|
from .factories import HarvestSourceFactory
|
|
13
|
+
from ..backends.dcat import URIS_TO_REPLACE
|
|
13
14
|
from .. import actions
|
|
14
15
|
|
|
15
16
|
log = logging.getLogger(__name__)
|
|
@@ -333,7 +334,7 @@ class DcatBackendTest:
|
|
|
333
334
|
assert 'geodesy' in dataset.tags # support dcat:theme
|
|
334
335
|
assert dataset.license.id == 'fr-lo'
|
|
335
336
|
assert len(dataset.resources) == 1
|
|
336
|
-
assert dataset.description.startswith(
|
|
337
|
+
assert dataset.description.startswith("Data from the 'National network")
|
|
337
338
|
assert dataset.harvest is not None
|
|
338
339
|
assert dataset.harvest.dct_identifier == '0437a976-cff1-4fa6-807a-c23006df2f8f'
|
|
339
340
|
assert dataset.harvest.remote_id == '0437a976-cff1-4fa6-807a-c23006df2f8f'
|
|
@@ -383,3 +384,27 @@ class DcatBackendTest:
|
|
|
383
384
|
error = job.errors[0]
|
|
384
385
|
expected = 'Unable to detect format from extension or mime type'
|
|
385
386
|
assert error.message == expected
|
|
387
|
+
|
|
388
|
+
def test_use_replaced_uris(self, rmock, mocker):
|
|
389
|
+
mocker.patch.dict(
|
|
390
|
+
URIS_TO_REPLACE,
|
|
391
|
+
{'http://example.org/this-url-does-not-exist': 'https://json-ld.org/contexts/person.jsonld'}
|
|
392
|
+
)
|
|
393
|
+
url = DCAT_URL_PATTERN.format(path='', domain=TEST_DOMAIN)
|
|
394
|
+
rmock.get(url, json={
|
|
395
|
+
'@context': 'http://example.org/this-url-does-not-exist',
|
|
396
|
+
'@type': 'dcat:Catalog',
|
|
397
|
+
'dataset': []
|
|
398
|
+
})
|
|
399
|
+
rmock.head(url, headers={'Content-Type': 'application/json'})
|
|
400
|
+
org = OrganizationFactory()
|
|
401
|
+
source = HarvestSourceFactory(backend='dcat',
|
|
402
|
+
url=url,
|
|
403
|
+
organization=org)
|
|
404
|
+
actions.run(source.slug)
|
|
405
|
+
|
|
406
|
+
source.reload()
|
|
407
|
+
|
|
408
|
+
job = source.get_last_job()
|
|
409
|
+
assert len(job.items) == 0
|
|
410
|
+
assert job.status == 'done'
|
udata/models/slug_fields.py
CHANGED
|
@@ -163,6 +163,10 @@ def populate_slug(instance, field):
|
|
|
163
163
|
return qs(**{field.db_field: s}).clear_cls_query().limit(1).count(True) > 0
|
|
164
164
|
|
|
165
165
|
while exists(slug):
|
|
166
|
+
# keep space for index suffix, trim slug if needed
|
|
167
|
+
slug_overflow = len('{0}-{1}'.format(base_slug, index)) - field.max_length
|
|
168
|
+
if slug_overflow >= 1:
|
|
169
|
+
base_slug = base_slug[:-slug_overflow]
|
|
166
170
|
slug = '{0}-{1}'.format(base_slug, index)
|
|
167
171
|
index += 1
|
|
168
172
|
|
udata/search/commands.py
CHANGED
|
@@ -52,7 +52,10 @@ def index_model(adapter, start, reindex=False, from_datetime=None):
|
|
|
52
52
|
log.info('Indexing %s objects', model.__name__)
|
|
53
53
|
qs = model.objects
|
|
54
54
|
if from_datetime:
|
|
55
|
-
|
|
55
|
+
date_property = ('last_modified_internal'
|
|
56
|
+
if model.__name__.lower() in ['dataset']
|
|
57
|
+
else 'last_modified')
|
|
58
|
+
qs = qs.filter(**{f'{date_property}__gte': from_datetime})
|
|
56
59
|
index_name = adapter.model.__name__.lower()
|
|
57
60
|
if reindex:
|
|
58
61
|
index_name += '-' + default_index_suffix_name(start)
|
|
@@ -101,7 +104,10 @@ def finalize_reindex(models, start):
|
|
|
101
104
|
modified_since_reindex = 0
|
|
102
105
|
for adapter in iter_adapters():
|
|
103
106
|
if not models or adapter.model.__name__.lower() in models:
|
|
104
|
-
|
|
107
|
+
date_property = ('last_modified_internal'
|
|
108
|
+
if adapter.model.__name__.lower() in ['dataset']
|
|
109
|
+
else 'last_modified')
|
|
110
|
+
modified_since_reindex += adapter.model.objects(**{f'{date_property}__gte': start}).count()
|
|
105
111
|
|
|
106
112
|
log.warning(
|
|
107
113
|
f'{modified_since_reindex} documents have been modified since reindexation start. '
|
udata/settings.py
CHANGED
|
@@ -187,6 +187,8 @@ class Defaults(object):
|
|
|
187
187
|
'tbody',
|
|
188
188
|
'thead',
|
|
189
189
|
'tfooter',
|
|
190
|
+
'details',
|
|
191
|
+
'summary'
|
|
190
192
|
# 'title',
|
|
191
193
|
]
|
|
192
194
|
|
|
@@ -362,6 +364,11 @@ class Defaults(object):
|
|
|
362
364
|
# Default pagination size on listing
|
|
363
365
|
POST_DEFAULT_PAGINATION = 20
|
|
364
366
|
|
|
367
|
+
# Organization settings
|
|
368
|
+
###########################################################################
|
|
369
|
+
# The business identification format to use for validation
|
|
370
|
+
ORG_BID_FORMAT = 'siret'
|
|
371
|
+
|
|
365
372
|
# Dataset settings
|
|
366
373
|
###########################################################################
|
|
367
374
|
# Max number of resources to display uncollapsed in dataset view
|