udata 8.0.2.dev29304__py2.py3-none-any.whl → 9.1.0__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of udata might be problematic. Click here for more details.

Files changed (86) hide show
  1. udata/__init__.py +1 -1
  2. udata/api/__init__.py +2 -0
  3. udata/api/commands.py +0 -2
  4. udata/api_fields.py +41 -3
  5. udata/commands/db.py +88 -48
  6. udata/core/dataservices/factories.py +33 -0
  7. udata/core/dataservices/models.py +42 -4
  8. udata/core/dataservices/rdf.py +106 -0
  9. udata/core/dataset/csv.py +8 -1
  10. udata/core/dataset/models.py +1 -2
  11. udata/core/dataset/rdf.py +37 -128
  12. udata/core/discussions/models.py +20 -0
  13. udata/core/organization/csv.py +5 -3
  14. udata/core/reports/__init__.py +0 -0
  15. udata/core/reports/api.py +44 -0
  16. udata/core/reports/constants.py +30 -0
  17. udata/core/reports/models.py +58 -0
  18. udata/core/reuse/csv.py +3 -0
  19. udata/core/site/api.py +33 -2
  20. udata/core/site/rdf.py +6 -1
  21. udata/core/spam/models.py +6 -0
  22. udata/core/topic/models.py +3 -2
  23. udata/core/topic/parsers.py +3 -2
  24. udata/core/user/apiv2.py +28 -0
  25. udata/db/__init__.py +0 -0
  26. udata/db/tasks.py +6 -0
  27. udata/features/notifications/__init__.py +0 -1
  28. udata/forms/fields.py +2 -2
  29. udata/harvest/api.py +19 -1
  30. udata/harvest/backends/base.py +118 -10
  31. udata/harvest/backends/dcat.py +28 -7
  32. udata/harvest/models.py +6 -0
  33. udata/harvest/tests/dcat/bnodes.xml +13 -2
  34. udata/harvest/tests/test_dcat_backend.py +21 -0
  35. udata/migrations/2024-06-11-fix-reuse-datasets-references.py +35 -0
  36. udata/models/__init__.py +1 -0
  37. udata/rdf.py +113 -2
  38. udata/routing.py +1 -1
  39. udata/settings.py +3 -1
  40. udata/static/admin.js +17 -17
  41. udata/static/admin.js.map +1 -1
  42. udata/static/chunks/{18.ad41fb75ac4226e1f3ce.js → 18.1922fd0b2b7fad122991.js} +3 -3
  43. udata/static/chunks/18.1922fd0b2b7fad122991.js.map +1 -0
  44. udata/static/chunks/{7.11ac4de064ae59691d49.js → 7.e2106342e94ee09393b1.js} +2 -2
  45. udata/static/chunks/7.e2106342e94ee09393b1.js.map +1 -0
  46. udata/static/common.js +1 -1
  47. udata/static/common.js.map +1 -1
  48. udata/storage/s3.py +3 -3
  49. udata/tasks.py +1 -0
  50. udata/tests/api/test_dataservices_api.py +26 -2
  51. udata/tests/api/test_datasets_api.py +1 -1
  52. udata/tests/api/test_reports_api.py +87 -0
  53. udata/tests/apiv2/test_me_api.py +40 -0
  54. udata/tests/dataset/test_dataset_rdf.py +19 -1
  55. udata/tests/frontend/test_auth.py +1 -4
  56. udata/tests/organization/test_csv_adapter.py +0 -1
  57. udata/tests/plugin.py +2 -0
  58. udata/tests/site/test_site_api.py +0 -1
  59. udata/tests/site/test_site_rdf.py +66 -0
  60. udata/tests/test_discussions.py +24 -34
  61. udata/tests/test_model.py +3 -2
  62. udata/tests/test_utils.py +1 -1
  63. udata/translations/ar/LC_MESSAGES/udata.mo +0 -0
  64. udata/translations/ar/LC_MESSAGES/udata.po +128 -64
  65. udata/translations/de/LC_MESSAGES/udata.mo +0 -0
  66. udata/translations/de/LC_MESSAGES/udata.po +128 -64
  67. udata/translations/es/LC_MESSAGES/udata.mo +0 -0
  68. udata/translations/es/LC_MESSAGES/udata.po +128 -64
  69. udata/translations/fr/LC_MESSAGES/udata.mo +0 -0
  70. udata/translations/fr/LC_MESSAGES/udata.po +128 -64
  71. udata/translations/it/LC_MESSAGES/udata.mo +0 -0
  72. udata/translations/it/LC_MESSAGES/udata.po +128 -64
  73. udata/translations/pt/LC_MESSAGES/udata.mo +0 -0
  74. udata/translations/pt/LC_MESSAGES/udata.po +128 -64
  75. udata/translations/sr/LC_MESSAGES/udata.mo +0 -0
  76. udata/translations/sr/LC_MESSAGES/udata.po +128 -64
  77. udata/translations/udata.pot +129 -65
  78. udata/uris.py +14 -13
  79. {udata-8.0.2.dev29304.dist-info → udata-9.1.0.dist-info}/METADATA +26 -7
  80. {udata-8.0.2.dev29304.dist-info → udata-9.1.0.dist-info}/RECORD +84 -72
  81. udata/static/chunks/18.ad41fb75ac4226e1f3ce.js.map +0 -1
  82. udata/static/chunks/7.11ac4de064ae59691d49.js.map +0 -1
  83. {udata-8.0.2.dev29304.dist-info → udata-9.1.0.dist-info}/LICENSE +0 -0
  84. {udata-8.0.2.dev29304.dist-info → udata-9.1.0.dist-info}/WHEEL +0 -0
  85. {udata-8.0.2.dev29304.dist-info → udata-9.1.0.dist-info}/entry_points.txt +0 -0
  86. {udata-8.0.2.dev29304.dist-info → udata-9.1.0.dist-info}/top_level.txt +0 -0
udata/__init__.py CHANGED
@@ -4,5 +4,5 @@
4
4
  udata
5
5
  '''
6
6
 
7
- __version__ = '8.0.2.dev'
7
+ __version__ = '9.1.0'
8
8
  __description__ = 'Open data portal'
udata/api/__init__.py CHANGED
@@ -323,6 +323,7 @@ def init_app(app):
323
323
  import udata.core.activity.api # noqa
324
324
  import udata.core.spatial.api # noqa
325
325
  import udata.core.user.api # noqa
326
+ import udata.core.user.apiv2 # noqa
326
327
  import udata.core.dataset.api # noqa
327
328
  import udata.core.dataset.apiv2 # noqa
328
329
  import udata.core.dataservices.api # noqa
@@ -333,6 +334,7 @@ def init_app(app):
333
334
  import udata.core.organization.apiv2 # noqa
334
335
  import udata.core.followers.api # noqa
335
336
  import udata.core.jobs.api # noqa
337
+ import udata.core.reports.api # noqa
336
338
  import udata.core.site.api # noqa
337
339
  import udata.core.tags.api # noqa
338
340
  import udata.core.topic.api # noqa
udata/api/commands.py CHANGED
@@ -1,6 +1,5 @@
1
1
  import logging
2
2
  import os
3
- import time
4
3
 
5
4
  import click
6
5
 
@@ -19,7 +18,6 @@ log = logging.getLogger(__name__)
19
18
  @cli.group('api')
20
19
  def grp():
21
20
  '''API related operations'''
22
- pass
23
21
 
24
22
 
25
23
  def json_to_file(data, filename, pretty=False):
udata/api_fields.py CHANGED
@@ -38,6 +38,9 @@ def convert_db_to_field(key, field, info = {}):
38
38
  constructor = restx_fields.String
39
39
  params['min_length'] = field.min_length
40
40
  params['max_length'] = field.max_length
41
+ params['enum'] = field.choices
42
+ elif isinstance(field, mongo_fields.ObjectIdField):
43
+ constructor = restx_fields.String
41
44
  elif isinstance(field, mongo_fields.FloatField):
42
45
  constructor = restx_fields.Float
43
46
  params['min'] = field.min # TODO min_value?
@@ -70,10 +73,14 @@ def convert_db_to_field(key, field, info = {}):
70
73
  constructor_write = restx_fields.String
71
74
  elif isinstance(field, mongo_fields.EmbeddedDocumentField):
72
75
  nested_fields = info.get('nested_fields')
73
- if nested_fields is None:
74
- raise ValueError(f"EmbeddedDocumentField `{key}` requires a `nested_fields` param to serialize/deserialize.")
76
+ if nested_fields is not None:
77
+ constructor = lambda **kwargs: restx_fields.Nested(nested_fields, **kwargs)
78
+ elif hasattr(field.document_type_obj, '__read_fields__'):
79
+ constructor_read = lambda **kwargs: restx_fields.Nested(field.document_type_obj.__read_fields__, **kwargs)
80
+ constructor_write = lambda **kwargs: restx_fields.Nested(field.document_type_obj.__write_fields__, **kwargs)
81
+ else:
82
+ raise ValueError(f"EmbeddedDocumentField `{key}` requires a `nested_fields` param to serialize/deserialize or a `@generate_fields()` definition.")
75
83
 
76
- constructor = lambda **kwargs: restx_fields.Nested(nested_fields, **kwargs)
77
84
  else:
78
85
  raise ValueError(f"Unsupported MongoEngine field type {field.__class__.__name__}")
79
86
 
@@ -96,6 +103,7 @@ def generate_fields(**kwargs):
96
103
  read_fields = {}
97
104
  write_fields = {}
98
105
  sortables = []
106
+ filterables = []
99
107
 
100
108
  read_fields['id'] = restx_fields.String(required=True)
101
109
 
@@ -106,6 +114,23 @@ def generate_fields(**kwargs):
106
114
  if info.get('sortable', False):
107
115
  sortables.append(key)
108
116
 
117
+ filterable = info.get('filterable', None)
118
+ if filterable is not None:
119
+ if 'key' not in filterable:
120
+ filterable['key'] = key
121
+ if 'column' not in filterable:
122
+ filterable['column'] = key
123
+
124
+ if 'constraints' not in filterable:
125
+ filterable['constraints'] = []
126
+ if isinstance(field, mongo_fields.ReferenceField) or (isinstance(field, mongo_fields.ListField) and isinstance(field.field, mongo_fields.ReferenceField)):
127
+ filterable['constraints'].append('objectid')
128
+
129
+ # We may add more information later here:
130
+ # - type of mongo query to execute (right now only simple =)
131
+
132
+ filterables.append(filterable)
133
+
109
134
  read, write = convert_db_to_field(key, field)
110
135
 
111
136
  if read:
@@ -159,6 +184,9 @@ def generate_fields(**kwargs):
159
184
  choices = sortables + ['-' + k for k in sortables]
160
185
  parser.add_argument('sort', type=str, location='args', choices=choices, help='The field (and direction) on which sorting apply')
161
186
 
187
+ for filterable in filterables:
188
+ parser.add_argument(filterable['key'], type=str, location='args')
189
+
162
190
  cls.__index_parser__ = parser
163
191
  def apply_sort_filters_and_pagination(base_query):
164
192
  args = cls.__index_parser__.parse_args()
@@ -166,6 +194,16 @@ def generate_fields(**kwargs):
166
194
  if sortables and args['sort']:
167
195
  base_query = base_query.order_by(args['sort'])
168
196
 
197
+ for filterable in filterables:
198
+ if args.get(filterable['key']):
199
+ for constraint in filterable['constraints']:
200
+ if constraint == 'objectid' and not ObjectId.is_valid(args[filterable['key']]):
201
+ api.abort(400, f'`{filterable["key"]}` must be an identifier')
202
+
203
+ base_query = base_query.filter(**{
204
+ filterable['column']: args[filterable['key']],
205
+ })
206
+
169
207
  if paginable:
170
208
  base_query = base_query.paginate(args['page'], args['page_size'])
171
209
 
udata/commands/db.py CHANGED
@@ -1,7 +1,10 @@
1
1
  import collections
2
+ from itertools import groupby
2
3
  import logging
3
4
  import os
5
+ import traceback
4
6
 
7
+ from bson import DBRef
5
8
  import click
6
9
  import mongoengine
7
10
 
@@ -135,8 +138,14 @@ def display_op(op):
135
138
  echo('{label:.<70} [{date}]'.format(label=label, date=timestamp))
136
139
  format_output(op['output'], success=op['success'], traceback=op.get('traceback'))
137
140
 
138
-
139
141
  def check_references(models_to_check):
142
+ # Cannot modify local scope from Python… :-(
143
+ class Log: errors = []
144
+
145
+ def print_and_save(text: str):
146
+ Log.errors.append(text.strip())
147
+ print(text)
148
+
140
149
  errors = collections.defaultdict(int)
141
150
 
142
151
  _models = []
@@ -147,7 +156,7 @@ def check_references(models_to_check):
147
156
  ]
148
157
 
149
158
  references = []
150
- for model in _models:
159
+ for model in set(_models):
151
160
  if model.__name__ == 'Activity':
152
161
  print(f'Skipping Activity model, scheduled for deprecation')
153
162
  continue
@@ -240,53 +249,84 @@ def check_references(models_to_check):
240
249
  print(f'- {reference["repr"]}({reference["destination"]}) — {reference["type"]}')
241
250
  print('')
242
251
 
243
- for reference in references:
244
- print(f'- {reference["repr"]}({reference["destination"]}) {reference["type"]}...')
245
- query = {f'{reference["name"]}__ne': None}
246
- qs = reference['model'].objects(**query).no_cache().all()
247
- try:
248
- for obj in qs:
249
- if reference['type'] == 'direct':
250
- try:
251
- _ = getattr(obj, reference['name'])
252
- except mongoengine.errors.DoesNotExist:
253
- errors[reference["repr"]] += 1
254
- elif reference['type'] == 'list':
255
- for sub in getattr(obj, reference['name']):
256
- try:
257
- _ = sub.id
258
- except mongoengine.errors.DoesNotExist:
259
- errors[reference["repr"]] += 1
260
- elif reference['type'] == 'embed_list':
261
- p1, p2 = reference['name'].split('__')
262
- for sub in getattr(obj, p1):
263
- try:
264
- getattr(sub, p2)
265
- except mongoengine.errors.DoesNotExist:
266
- errors[reference["repr"]] += 1
267
- elif reference['type'] == 'embed':
268
- p1, p2 = reference['name'].split('__')
269
- sub = getattr(obj, p1)
270
- try:
271
- getattr(sub, p2)
272
- except mongoengine.errors.DoesNotExist:
273
- errors[reference["repr"]] += 1
274
- elif reference['type'] == 'embed_list_ref':
275
- p1, p2 = reference['name'].split('__')
276
- sub = getattr(getattr(obj, p1), p2)
277
- for obj in sub:
278
- try:
279
- obj.id
280
- except mongoengine.errors.DoesNotExist:
281
- errors[reference["repr"]] += 1
282
- else:
283
- print(f'Unknown ref type {reference["type"]}')
284
- print('Errors:', errors[reference["repr"]])
285
- except mongoengine.errors.FieldDoesNotExist as e:
286
- print('[ERROR]', e)
287
-
288
- print(f'\n Total errors: {sum(errors.values())}')
252
+ total = 0
253
+ for model, model_references in groupby(references, lambda i: i["model"]):
254
+ model_references = list(model_references)
255
+ count = model.objects.count()
256
+ print(f'- doing {count} {model.__name__}…')
257
+ errors[model] = {}
258
+
259
+ qs = model.objects().no_cache().all()
260
+ with click.progressbar(qs, length=count) as models:
261
+ for obj in models:
262
+ for reference in model_references:
263
+ key = f'\t- {reference["repr"]}({reference["destination"]}) {reference["type"]}…'
264
+ if key not in errors[model]:
265
+ errors[model][key] = 0
289
266
 
267
+ try:
268
+ if reference['type'] == 'direct':
269
+ try:
270
+ _ = getattr(obj, reference['name'])
271
+ except mongoengine.errors.DoesNotExist:
272
+ errors[model][key] += 1
273
+ print_and_save(f'\t{model.__name__}#{obj.id} have a broken reference for `{reference["name"]}`')
274
+ elif reference['type'] == 'list':
275
+ attr_list = getattr(obj, reference['name'], [])
276
+ for i, sub in enumerate(attr_list):
277
+ # If it's still an instance of DBRef it means that it failed to
278
+ # dereference the ID.
279
+ if isinstance(sub, DBRef):
280
+ errors[model][key] += 1
281
+ print_and_save(f'\t{model.__name__}#{obj.id} have a broken reference for {reference["name"]}[{i}]')
282
+ elif reference['type'] == 'embed_list':
283
+ p1, p2 = reference['name'].split('__')
284
+ attr_list = getattr(obj, p1, [])
285
+ for i, sub in enumerate(attr_list):
286
+ try:
287
+ getattr(sub, p2)
288
+ except mongoengine.errors.DoesNotExist:
289
+ errors[model][key] += 1
290
+ print_and_save(f'\t{model.__name__}#{obj.id} have a broken reference for {p1}[{i}].{p2}')
291
+ elif reference['type'] == 'embed':
292
+ p1, p2 = reference['name'].split('__')
293
+ sub = getattr(obj, p1)
294
+ if sub is None: continue
295
+ try:
296
+ getattr(sub, p2)
297
+ except mongoengine.errors.DoesNotExist:
298
+ errors[model][key] += 1
299
+ print_and_save(f'\t{model.__name__}#{obj.id} have a broken reference for {p1}.{p2}')
300
+ elif reference['type'] == 'embed_list_ref':
301
+ p1, p2 = reference['name'].split('__')
302
+ a = getattr(obj, p1)
303
+ if a is None: continue
304
+ sub = getattr(a, p2, [])
305
+ for i, child in enumerate(sub):
306
+ # If it's still an instance of DBRef it means that it failed to
307
+ # dereference the ID.
308
+ if isinstance(child, DBRef):
309
+ errors[model][key] += 1
310
+ print_and_save(f'\t{model.__name__}#{obj.id} have a broken reference for {p1}.{p2}[{i}]')
311
+ else:
312
+ print_and_save(f'Unknown ref type {reference["type"]}')
313
+ except mongoengine.errors.FieldDoesNotExist as e:
314
+ print_and_save(f'[ERROR for {model.__name__} {obj.id}] {traceback.format_exc()}')
315
+
316
+ for key, nb_errors in errors[model].items():
317
+ print(f'{key}: {nb_errors}')
318
+ total += nb_errors
319
+
320
+ print(f'\n Total errors: {total}')
321
+
322
+ if total > 0:
323
+ try:
324
+ import sentry_sdk
325
+ with sentry_sdk.push_scope() as scope:
326
+ scope.set_extra("errors", Log.errors)
327
+ sentry_sdk.capture_message(f"{total} integrity errors", "fatal")
328
+ except ImportError:
329
+ print("`sentry_sdk` not installed. The errors weren't reported")
290
330
 
291
331
  @grp.command()
292
332
  @click.option('--models', multiple=True, default=[], help='Model(s) to check')
@@ -0,0 +1,33 @@
1
+ import factory
2
+
3
+ from udata.core.dataservices.models import Dataservice, HarvestMetadata
4
+ from udata.core.organization.factories import OrganizationFactory
5
+ from udata.factories import ModelFactory
6
+
7
+ class HarvestMetadataFactory(ModelFactory):
8
+ class Meta:
9
+ model = HarvestMetadata
10
+
11
+ backend = 'csw-dcat'
12
+ domain = 'data.gouv.fr'
13
+
14
+ source_id = factory.Faker('unique_string')
15
+ source_url = factory.Faker('url')
16
+
17
+ remote_id = factory.Faker('unique_string')
18
+ remote_url = factory.Faker('url')
19
+
20
+ uri = factory.Faker('url')
21
+
22
+ class DataserviceFactory(ModelFactory):
23
+ class Meta:
24
+ model = Dataservice
25
+
26
+ title = factory.Faker('sentence')
27
+ description = factory.Faker('text')
28
+ base_api_url = factory.Faker('url')
29
+
30
+ class Params:
31
+ org = factory.Trait(
32
+ organization=factory.SubFactory(OrganizationFactory),
33
+ )
@@ -31,6 +31,35 @@ class DataserviceQuerySet(OwnedQuerySet):
31
31
  db.Q(deleted_at__ne=None) |
32
32
  db.Q(archived_at__ne=None))
33
33
 
34
+ @generate_fields()
35
+ class HarvestMetadata(db.EmbeddedDocument):
36
+ backend = field(db.StringField())
37
+ domain = field(db.StringField())
38
+
39
+ source_id = field(db.StringField())
40
+ source_url = field(db.URLField())
41
+
42
+ remote_id = field(db.StringField())
43
+ remote_url = field(db.URLField())
44
+
45
+ # If the node ID is a `URIRef` it means it links to something external, if it's not an `URIRef` it's often a
46
+ # auto-generated ID just to link multiple RDF node togethers. When exporting as RDF to other catalogs, we
47
+ # want to re-use this node ID (only if it's not auto-generated) to improve compatibility.
48
+ uri = field(
49
+ db.URLField(),
50
+ description="RDF node ID if it's an `URIRef`. `None` if it's not present or if it's a random auto-generated ID inside the graph.",
51
+ )
52
+
53
+ created_at = field(
54
+ db.DateTimeField(),
55
+ description="Date of the creation as provided by the harvested catalog"
56
+ )
57
+ last_update = field(
58
+ db.DateTimeField(),
59
+ description="Date of the last harvesting"
60
+ )
61
+ archived_at = field(db.DateTimeField())
62
+
34
63
  @generate_fields()
35
64
  class Dataservice(WithMetrics, Owned, db.Document):
36
65
  meta = {
@@ -111,17 +140,26 @@ class Dataservice(WithMetrics, Owned, db.Document):
111
140
  db.ListField(
112
141
  field(
113
142
  db.ReferenceField(Dataset),
114
- nested_fields=datasets_api_fields.dataset_fields,
143
+ nested_fields=datasets_api_fields.dataset_ref_fields,
115
144
  )
116
- )
145
+ ),
146
+ filterable={
147
+ 'key': 'dataset',
148
+ },
149
+ )
150
+
151
+ harvest = field(
152
+ db.EmbeddedDocumentField(HarvestMetadata),
153
+ readonly=True,
117
154
  )
118
155
 
119
156
  @function_field(description="Link to the API endpoint for this dataservice")
120
157
  def self_api_url(self):
121
158
  return endpoint_for('api.dataservice', dataservice=self, _external=True)
122
159
 
123
- def self_web_url():
124
- pass
160
+ @function_field(description="Link to the udata web page for this dataservice")
161
+ def self_web_url(self):
162
+ return endpoint_for('dataservices.show', dataservice=self, _external=True)
125
163
 
126
164
  # TODO
127
165
  # frequency = db.StringField(choices=list(UPDATE_FREQUENCIES.keys()))
@@ -0,0 +1,106 @@
1
+
2
+ from rdflib import RDF, BNode, Graph, Literal, URIRef
3
+
4
+ from udata.core.dataservices.models import Dataservice, HarvestMetadata as HarvestDataserviceMetadata
5
+ from udata.core.dataset.models import Dataset, License
6
+ from udata.core.dataset.rdf import dataset_to_graph_id, sanitize_html
7
+ from udata.rdf import namespace_manager, DCAT, DCT, contact_point_from_rdf, rdf_value, remote_url_from_rdf, themes_from_rdf, url_from_rdf
8
+ from udata.uris import endpoint_for
9
+
10
+ def dataservice_from_rdf(graph: Graph, dataservice: Dataservice, node, all_datasets: list[Dataset]) -> Dataservice :
11
+ '''
12
+ Create or update a dataset from a RDF/DCAT graph
13
+ '''
14
+ if node is None: # Assume first match is the only match
15
+ node = graph.value(predicate=RDF.type, object=DCAT.DataService)
16
+
17
+ d = graph.resource(node)
18
+
19
+ dataservice.title = rdf_value(d, DCT.title)
20
+ dataservice.description = sanitize_html(d.value(DCT.description) or d.value(DCT.abstract))
21
+
22
+ dataservice.base_api_url = url_from_rdf(d, DCAT.endpointURL)
23
+ dataservice.endpoint_description_url = url_from_rdf(d, DCAT.endpointDescription)
24
+
25
+ dataservice.contact_point = contact_point_from_rdf(d, dataservice) or dataservice.contact_point
26
+
27
+ datasets = []
28
+ for dataset_node in d.objects(DCAT.servesDataset):
29
+ id = dataset_node.value(DCT.identifier)
30
+ dataset = next((d for d in all_datasets if d is not None and d.harvest.remote_id == id), None)
31
+
32
+ if dataset is None:
33
+ # We try with `endswith` because Europe XSLT have problems with IDs. Sometimes they are prefixed with the domain of the catalog, sometimes not.
34
+ dataset = next((d for d in all_datasets if d is not None and d.harvest.remote_id.endswith(id)), None)
35
+
36
+ if dataset is not None:
37
+ datasets.append(dataset.id)
38
+
39
+ if datasets:
40
+ dataservice.datasets = datasets
41
+
42
+ license = rdf_value(d, DCT.license)
43
+ if license is not None:
44
+ dataservice.license = License.guess(license)
45
+
46
+ if not dataservice.harvest:
47
+ dataservice.harvest = HarvestDataserviceMetadata()
48
+
49
+ dataservice.harvest.uri = d.identifier.toPython() if isinstance(d.identifier, URIRef) else None
50
+ dataservice.harvest.remote_url = remote_url_from_rdf(d)
51
+ dataservice.harvest.created_at = rdf_value(d, DCT.issued)
52
+ dataservice.metadata_modified_at = rdf_value(d, DCT.modified)
53
+
54
+ dataservice.tags = themes_from_rdf(d)
55
+
56
+ return dataservice
57
+
58
+
59
+ def dataservice_to_rdf(dataservice: Dataservice, graph=None):
60
+ '''
61
+ Map a dataservice domain model to a DCAT/RDF graph
62
+ '''
63
+ # Use the unlocalized permalink to the dataset as URI when available
64
+ # unless there is already an upstream URI
65
+ if dataservice.harvest and dataservice.harvest.uri:
66
+ id = URIRef(dataservice.harvest.uri)
67
+ elif dataservice.id:
68
+ id = URIRef(endpoint_for('dataservices.show_redirect', 'api.dataservice',
69
+ dataservice=dataservice.id, _external=True))
70
+ else:
71
+ # Should not happen in production. Some test only
72
+ # `build()` a dataset without saving it to the DB.
73
+ id = BNode()
74
+
75
+ # Expose upstream identifier if present
76
+ if dataservice.harvest:
77
+ identifier = dataservice.harvest.remote_id
78
+ else:
79
+ identifier = dataservice.id
80
+ graph = graph or Graph(namespace_manager=namespace_manager)
81
+
82
+ d = graph.resource(id)
83
+ d.set(RDF.type, DCAT.DataService)
84
+ d.set(DCT.identifier, Literal(identifier))
85
+ d.set(DCT.title, Literal(dataservice.title))
86
+ d.set(DCT.description, Literal(dataservice.description))
87
+ d.set(DCT.issued, Literal(dataservice.created_at))
88
+
89
+ if dataservice.base_api_url:
90
+ d.set(DCAT.endpointURL, Literal(dataservice.base_api_url))
91
+
92
+ if dataservice.endpoint_description_url:
93
+ d.set(DCAT.endpointDescription, Literal(dataservice.endpoint_description_url))
94
+
95
+ for tag in dataservice.tags:
96
+ d.add(DCAT.keyword, Literal(tag))
97
+
98
+ # `dataset_to_graph_id(dataset)` URIRef may not exist in the current page
99
+ # but should exists in the catalog somewhere. Maybe we should create a Node
100
+ # with some basic information about this dataset (but this will return a page
101
+ # with more datasets than the page size… and could be problematic when processing the
102
+ # correct Node with all the information in a future page)
103
+ for dataset in dataservice.datasets:
104
+ d.add(DCAT.servesDataset, dataset_to_graph_id(dataset))
105
+
106
+ return d
udata/core/dataset/csv.py CHANGED
@@ -19,6 +19,9 @@ class DatasetCsvAdapter(csv.Adapter):
19
19
  ('url', 'external_url'),
20
20
  ('organization', 'organization.name'),
21
21
  ('organization_id', 'organization.id'),
22
+ ('owner', 'owner.slug'), # in case it's owned by a user, or introduce 'owner_type'?
23
+ ('owner_id', 'owner.id'),
24
+ # 'contact_point', # ?
22
25
  'description',
23
26
  'frequency',
24
27
  'license',
@@ -26,19 +29,20 @@ class DatasetCsvAdapter(csv.Adapter):
26
29
  'temporal_coverage.end',
27
30
  'spatial.granularity',
28
31
  ('spatial.zones', serialize_spatial_zones),
29
- 'private',
30
32
  ('featured', lambda o: o.featured or False),
31
33
  'created_at',
32
34
  'last_modified',
33
35
  ('tags', lambda o: ','.join(o.tags)),
34
36
  ('archived', lambda o: o.archived or False),
35
37
  ('resources_count', lambda o: len(o.resources)),
38
+ ('main_resources_count', lambda o: len([r for r in o.resources if r.type == 'main'])),
36
39
  'downloads',
37
40
  ('harvest.backend', lambda r: r.harvest and r.harvest.backend),
38
41
  ('harvest.domain', lambda r: r.harvest and r.harvest.domain),
39
42
  ('harvest.created_at', lambda r: r.harvest and r.harvest.created_at),
40
43
  ('harvest.modified_at', lambda r: r.harvest and r.harvest.modified_at),
41
44
  ('quality_score', lambda o: format(o.quality['score'], '.2f')),
45
+ # schema? what is the schema of a dataset?
42
46
  )
43
47
 
44
48
  def dynamic_fields(self):
@@ -85,6 +89,9 @@ class ResourcesCsvAdapter(csv.NestedAdapter):
85
89
  ('downloads', lambda o: int(o.metrics.get('views', 0))),
86
90
  ('harvest.created_at', lambda o: o.harvest and o.harvest.created_at),
87
91
  ('harvest.modified_at', lambda o: o.harvest and o.harvest.modified_at),
92
+ ('schema_name', 'schema.name'),
93
+ ('schema_version', 'schema.version'),
94
+ ('preview_url', lambda o: o.preview_url or False),
88
95
  )
89
96
  attribute = 'resources'
90
97
 
@@ -13,7 +13,6 @@ from pydoc import locate
13
13
  from stringdist import rdlevenshtein
14
14
  from werkzeug.utils import cached_property
15
15
  import requests
16
- from typing import Optional, Tuple
17
16
 
18
17
  from udata.app import cache
19
18
  from udata.core import storages
@@ -964,7 +963,7 @@ class ResourceSchema(object):
964
963
  def assignable_schemas():
965
964
  return [s for s in ResourceSchema.all() if s.get('schema_type') not in NON_ASSIGNABLE_SCHEMA_TYPES]
966
965
 
967
- def get_existing_schema_info_by_url(url: str) -> Optional[Tuple[str, Optional[str]]]:
966
+ def get_existing_schema_info_by_url(url: str) -> tuple[str, str | None] | None:
968
967
  '''
969
968
  Returns the name and the version if exists
970
969
  '''