udata 10.1.3.dev34224__py2.py3-none-any.whl → 10.1.3.dev34275__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of udata might be problematic. Click here for more details.

Files changed (33) hide show
  1. udata/commands/fixtures.py +1 -1
  2. udata/core/dataservices/constants.py +11 -0
  3. udata/core/dataservices/csv.py +3 -3
  4. udata/core/dataservices/models.py +26 -12
  5. udata/core/dataservices/rdf.py +5 -3
  6. udata/core/dataservices/search.py +13 -5
  7. udata/core/metrics/commands.py +20 -1
  8. udata/harvest/tests/test_dcat_backend.py +1 -1
  9. udata/migrations/2025-01-05-dataservices-fields-changes.py +136 -0
  10. udata/static/chunks/{11.55ab79044cda0271b595.js → 11.822f6ccb39c92c796d13.js} +3 -3
  11. udata/static/chunks/{11.55ab79044cda0271b595.js.map → 11.822f6ccb39c92c796d13.js.map} +1 -1
  12. udata/static/chunks/{13.2d06442dd9a05d9777b5.js → 13.d9c1735d14038b94c17e.js} +2 -2
  13. udata/static/chunks/{13.2d06442dd9a05d9777b5.js.map → 13.d9c1735d14038b94c17e.js.map} +1 -1
  14. udata/static/chunks/{17.e8e4caaad5cb0cc0bacc.js → 17.81c57c0dedf812e43013.js} +2 -2
  15. udata/static/chunks/{17.e8e4caaad5cb0cc0bacc.js.map → 17.81c57c0dedf812e43013.js.map} +1 -1
  16. udata/static/chunks/{19.f03a102365af4315f9db.js → 19.ba0bb2baa40e899d440b.js} +3 -3
  17. udata/static/chunks/{19.f03a102365af4315f9db.js.map → 19.ba0bb2baa40e899d440b.js.map} +1 -1
  18. udata/static/chunks/{5.5660483641193b7f8295.js → 5.0652a860afda96795a53.js} +3 -3
  19. udata/static/chunks/{5.5660483641193b7f8295.js.map → 5.0652a860afda96795a53.js.map} +1 -1
  20. udata/static/chunks/{6.30dce49d17db07600b06.js → 6.92d7c2ec6d20005774ef.js} +3 -3
  21. udata/static/chunks/{6.30dce49d17db07600b06.js.map → 6.92d7c2ec6d20005774ef.js.map} +1 -1
  22. udata/static/chunks/{8.b58fcd977fcaf3415571.js → 8.0f42630e6d8ff782928e.js} +2 -2
  23. udata/static/chunks/{8.b58fcd977fcaf3415571.js.map → 8.0f42630e6d8ff782928e.js.map} +1 -1
  24. udata/static/common.js +1 -1
  25. udata/static/common.js.map +1 -1
  26. udata/tests/api/test_dataservices_api.py +41 -2
  27. udata/tests/dataservice/test_csv_adapter.py +2 -0
  28. {udata-10.1.3.dev34224.dist-info → udata-10.1.3.dev34275.dist-info}/METADATA +3 -1
  29. {udata-10.1.3.dev34224.dist-info → udata-10.1.3.dev34275.dist-info}/RECORD +33 -31
  30. {udata-10.1.3.dev34224.dist-info → udata-10.1.3.dev34275.dist-info}/LICENSE +0 -0
  31. {udata-10.1.3.dev34224.dist-info → udata-10.1.3.dev34275.dist-info}/WHEEL +0 -0
  32. {udata-10.1.3.dev34224.dist-info → udata-10.1.3.dev34275.dist-info}/entry_points.txt +0 -0
  33. {udata-10.1.3.dev34224.dist-info → udata-10.1.3.dev34275.dist-info}/top_level.txt +0 -0
@@ -39,7 +39,7 @@ COMMUNITY_RES_URL = "/api/1/datasets/community_resources"
39
39
  DISCUSSION_URL = "/api/1/discussions"
40
40
 
41
41
 
42
- DEFAULT_FIXTURE_FILE_TAG: str = "v5.0.0"
42
+ DEFAULT_FIXTURE_FILE_TAG: str = "v6.0.0"
43
43
  DEFAULT_FIXTURE_FILE: str = f"https://raw.githubusercontent.com/opendatateam/udata-fixtures/{DEFAULT_FIXTURE_FILE_TAG}/results.json" # noqa
44
44
 
45
45
  DEFAULT_FIXTURES_RESULTS_FILENAME: str = "results.json"
@@ -0,0 +1,11 @@
1
+ DATASERVICE_FORMATS = ["REST", "WMS", "WSL"]
2
+
3
+
4
+ DATASERVICE_ACCESS_TYPE_OPEN = "open"
5
+ DATASERVICE_ACCESS_TYPE_OPEN_WITH_ACCOUNT = "open_with_account"
6
+ DATASERVICE_ACCESS_TYPE_RESTRICTED = "restricted"
7
+ DATASERVICE_ACCESS_TYPES = [
8
+ DATASERVICE_ACCESS_TYPE_OPEN,
9
+ DATASERVICE_ACCESS_TYPE_OPEN_WITH_ACCOUNT,
10
+ DATASERVICE_ACCESS_TYPE_RESTRICTED,
11
+ ]
@@ -13,13 +13,13 @@ class DataserviceCsvAdapter(csv.Adapter):
13
13
  ("url", lambda d: d.self_web_url()),
14
14
  "description",
15
15
  "base_api_url",
16
- "endpoint_description_url",
16
+ "machine_documentation_url",
17
+ "technical_documentation_url",
17
18
  "business_documentation_url",
18
19
  "authorization_request_url",
19
20
  "availability",
20
21
  "rate_limiting",
21
- "is_restricted",
22
- "has_token",
22
+ "access_type",
23
23
  "license",
24
24
  ("organization", "organization.name"),
25
25
  ("organization_id", "organization.id"),
@@ -8,6 +8,7 @@ from mongoengine.signals import post_save
8
8
  import udata.core.contact_point.api_fields as contact_api_fields
9
9
  import udata.core.dataset.api_fields as datasets_api_fields
10
10
  from udata.api_fields import field, function_field, generate_fields
11
+ from udata.core.dataservices.constants import DATASERVICE_ACCESS_TYPES, DATASERVICE_FORMATS
11
12
  from udata.core.dataset.models import Dataset
12
13
  from udata.core.metrics.models import WithMetrics
13
14
  from udata.core.owned import Owned, OwnedQuerySet
@@ -24,8 +25,6 @@ from udata.uris import endpoint_for
24
25
  # "spatial"
25
26
  # "temporal_coverage"
26
27
 
27
- DATASERVICE_FORMATS = ["REST", "WMS", "WSL"]
28
-
29
28
 
30
29
  class DataserviceQuerySet(OwnedQuerySet):
31
30
  def visible(self):
@@ -100,11 +99,17 @@ class HarvestMetadata(db.EmbeddedDocument):
100
99
  @generate_fields(
101
100
  searchable=True,
102
101
  additional_filters={"organization_badge": "organization.badges"},
102
+ additional_sorts=[
103
+ {"key": "followers", "value": "metrics.followers"},
104
+ {"key": "views", "value": "metrics.views"},
105
+ ],
103
106
  )
104
107
  class Dataservice(WithMetrics, Owned, db.Document):
105
108
  meta = {
106
109
  "indexes": [
107
110
  "$title",
111
+ "metrics.followers",
112
+ "metrics.views",
108
113
  ]
109
114
  + Owned.meta["indexes"],
110
115
  "queryset_class": DataserviceQuerySet,
@@ -132,13 +137,22 @@ class Dataservice(WithMetrics, Owned, db.Document):
132
137
  )
133
138
  description = field(db.StringField(default=""), description="In markdown")
134
139
  base_api_url = field(db.URLField(), sortable=True)
135
- endpoint_description_url = field(db.URLField())
140
+
141
+ machine_documentation_url = field(
142
+ db.URLField(), description="Swagger link, OpenAPI format, WMS XML…"
143
+ )
144
+ technical_documentation_url = field(db.URLField(), description="HTML version of a Swagger…")
136
145
  business_documentation_url = field(db.URLField())
137
- authorization_request_url = field(db.URLField())
138
- availability = field(db.FloatField(min=0, max=100), example="99.99")
146
+
139
147
  rate_limiting = field(db.StringField())
140
- is_restricted = field(db.BooleanField(), filterable={})
141
- has_token = field(db.BooleanField())
148
+ rate_limiting_url = field(db.URLField())
149
+
150
+ availability = field(db.FloatField(min=0, max=100), example="99.99")
151
+ availability_url = field(db.URLField())
152
+
153
+ access_type = field(db.StringField(choices=DATASERVICE_ACCESS_TYPES), filterable={})
154
+ authorization_request_url = field(db.URLField())
155
+
142
156
  format = field(db.StringField(choices=DATASERVICE_FORMATS))
143
157
 
144
158
  license = field(
@@ -223,11 +237,11 @@ class Dataservice(WithMetrics, Owned, db.Document):
223
237
  def self_web_url(self):
224
238
  return endpoint_for("dataservices.show", dataservice=self, _external=True)
225
239
 
226
- # TODO
227
- # frequency = db.StringField(choices=list(UPDATE_FREQUENCIES.keys()))
228
- # temporal_coverage = db.EmbeddedDocumentField(db.DateRange)
229
- # spatial = db.EmbeddedDocumentField(SpatialCoverage)
230
- # harvest = db.EmbeddedDocumentField(HarvestDatasetMetadata)
240
+ __metrics_keys__ = [
241
+ "discussions",
242
+ "followers",
243
+ "views",
244
+ ]
231
245
 
232
246
  @property
233
247
  def is_hidden(self):
@@ -42,7 +42,9 @@ def dataservice_from_rdf(
42
42
  dataservice.description = sanitize_html(d.value(DCT.description) or d.value(DCT.abstract))
43
43
 
44
44
  dataservice.base_api_url = url_from_rdf(d, DCAT.endpointURL)
45
- dataservice.endpoint_description_url = url_from_rdf(d, DCAT.endpointDescription)
45
+
46
+ # TODO detect if it's human-readable or not?
47
+ dataservice.machine_documentation_url = url_from_rdf(d, DCAT.endpointDescription)
46
48
 
47
49
  roles = [ # Imbricated list of contact points for each role
48
50
  contact_points_from_rdf(d, rdf_entity, role, dataservice)
@@ -145,8 +147,8 @@ def dataservice_to_rdf(dataservice: Dataservice, graph=None):
145
147
  ),
146
148
  )
147
149
 
148
- if dataservice.endpoint_description_url:
149
- d.set(DCAT.endpointDescription, URIRef(dataservice.endpoint_description_url))
150
+ if dataservice.machine_documentation_url:
151
+ d.set(DCAT.endpointDescription, URIRef(dataservice.machine_documentation_url))
150
152
 
151
153
  # Add DCAT-AP HVD properties if the dataservice is tagged hvd.
152
154
  # See https://semiceu.github.io/DCAT-AP/releases/2.2.0-hvd/
@@ -5,6 +5,11 @@ from flask_restx.inputs import boolean
5
5
 
6
6
  from udata.api import api
7
7
  from udata.api.parsers import ModelApiParser
8
+ from udata.core.dataservices.constants import (
9
+ DATASERVICE_ACCESS_TYPE_OPEN,
10
+ DATASERVICE_ACCESS_TYPE_OPEN_WITH_ACCOUNT,
11
+ DATASERVICE_ACCESS_TYPE_RESTRICTED,
12
+ )
8
13
  from udata.models import Dataservice, Organization, User
9
14
  from udata.search import (
10
15
  BoolFilter,
@@ -47,7 +52,11 @@ class DataserviceApiParser(ModelApiParser):
47
52
  api.abort(400, "Organization arg must be an identifier")
48
53
  dataservices = dataservices.filter(organization=args["organization"])
49
54
  if "is_restricted" in args:
50
- dataservices = dataservices.filter(is_restricted=boolean(args["is_restricted"]))
55
+ dataservices = dataservices.filter(
56
+ access_type__in=[DATASERVICE_ACCESS_TYPE_RESTRICTED]
57
+ if boolean(args["is_restricted"])
58
+ else [DATASERVICE_ACCESS_TYPE_OPEN, DATASERVICE_ACCESS_TYPE_OPEN_WITH_ACCOUNT]
59
+ )
51
60
  return dataservices
52
61
 
53
62
 
@@ -56,9 +65,7 @@ class DataserviceSearch(ModelSearchAdapter):
56
65
  model = Dataservice
57
66
  search_url = "dataservices/"
58
67
 
59
- sorts = {
60
- "created": "created_at",
61
- }
68
+ sorts = {"created": "created_at", "views": "views", "followers": "followers"}
62
69
 
63
70
  filters = {
64
71
  "tag": Filter(),
@@ -114,5 +121,6 @@ class DataserviceSearch(ModelSearchAdapter):
114
121
  "tags": dataservice.tags,
115
122
  "extras": extras,
116
123
  "followers": dataservice.metrics.get("followers", 0),
117
- "is_restricted": dataservice.is_restricted or False,
124
+ "is_restricted": dataservice.access_type == DATASERVICE_ACCESS_TYPE_RESTRICTED,
125
+ "views": dataservice.metrics.get("views", 0),
118
126
  }
@@ -4,6 +4,7 @@ import click
4
4
  from flask import current_app
5
5
 
6
6
  from udata.commands import cli, success
7
+ from udata.core.dataservices.models import Dataservice
7
8
  from udata.models import Dataset, GeoZone, Organization, Reuse, Site, User
8
9
 
9
10
  log = logging.getLogger(__name__)
@@ -19,6 +20,7 @@ def grp():
19
20
  @click.option("-s", "--site", is_flag=True, help="Update site metrics")
20
21
  @click.option("-o", "--organizations", is_flag=True, help="Compute organizations metrics")
21
22
  @click.option("-d", "--datasets", is_flag=True, help="Compute datasets metrics")
23
+ @click.option("--dataservices", is_flag=True, help="Compute dataservices metrics")
22
24
  @click.option("-r", "--reuses", is_flag=True, help="Compute reuses metrics")
23
25
  @click.option("-u", "--users", is_flag=True, help="Compute users metrics")
24
26
  @click.option("-g", "--geozones", is_flag=True, help="Compute geo levels metrics")
@@ -28,12 +30,13 @@ def update(
28
30
  organizations=False,
29
31
  users=False,
30
32
  datasets=False,
33
+ dataservices=False,
31
34
  reuses=False,
32
35
  geozones=False,
33
36
  drop=False,
34
37
  ):
35
38
  """Update all metrics for the current date"""
36
- do_all = not any((site, organizations, users, datasets, reuses, geozones))
39
+ do_all = not any((site, organizations, users, datasets, dataservices, reuses, geozones))
37
40
 
38
41
  if do_all or site:
39
42
  log.info("Update site metrics")
@@ -75,6 +78,22 @@ def update(
75
78
  log.info(f"Error during update: {e}")
76
79
  continue
77
80
 
81
+ if do_all or dataservices:
82
+ log.info("Update dataservices metrics")
83
+ all_dataservices = Dataservice.objects.visible().timeout(False)
84
+ with click.progressbar(
85
+ all_dataservices, length=Dataservice.objects.count()
86
+ ) as dataservice_bar:
87
+ for dataservice in dataservice_bar:
88
+ try:
89
+ if drop:
90
+ dataservice.metrics.clear()
91
+ dataservice.count_discussions()
92
+ dataservice.count_followers()
93
+ except Exception as e:
94
+ log.info(f"Error during update: {e}")
95
+ continue
96
+
78
97
  if do_all or reuses:
79
98
  log.info("Update reuses metrics")
80
99
  all_reuses = Reuse.objects.visible().timeout(False)
@@ -179,7 +179,7 @@ class DcatBackendTest:
179
179
  assert dataservices[0].title == "Explore API v2"
180
180
  assert dataservices[0].base_api_url == "https://data.paris2024.org/api/explore/v2.1/"
181
181
  assert (
182
- dataservices[0].endpoint_description_url
182
+ dataservices[0].machine_documentation_url
183
183
  == "https://data.paris2024.org/api/explore/v2.1/swagger.json"
184
184
  )
185
185
  assert (
@@ -0,0 +1,136 @@
1
+ """
2
+ This migration keeps only the "Local authority" badge if the organization also has the "Public service" badge.
3
+ """
4
+
5
+ import logging
6
+ from typing import List
7
+
8
+ from mongoengine.connection import get_db
9
+
10
+ from udata.core.dataservices.constants import (
11
+ DATASERVICE_ACCESS_TYPE_OPEN,
12
+ DATASERVICE_ACCESS_TYPE_OPEN_WITH_ACCOUNT,
13
+ DATASERVICE_ACCESS_TYPE_RESTRICTED,
14
+ )
15
+ from udata.core.dataservices.models import Dataservice
16
+
17
+ log = logging.getLogger(__name__)
18
+
19
+
20
+ def migrate(db):
21
+ log.info("Preprocessing dataservices…")
22
+
23
+ count = get_db().dataservice.update_many(
24
+ filter={
25
+ "$or": [
26
+ {"is_restricted": None},
27
+ {"is_restricted": {"$exists": False}},
28
+ ]
29
+ },
30
+ update={"$set": {"is_restricted": False}},
31
+ )
32
+ log.info(
33
+ f"\tConverted {count.modified_count} dataservices from `is_restricted=None` to `is_restricted=False`"
34
+ )
35
+
36
+ count = get_db().dataservice.update_many(
37
+ filter={
38
+ "$or": [
39
+ {"has_token": None},
40
+ {"has_token": {"$exists": False}},
41
+ ]
42
+ },
43
+ update={"$set": {"has_token": False}},
44
+ )
45
+ log.info(
46
+ f"\tConverted {count.modified_count} dataservices from `has_token=None` to `has_token=False`"
47
+ )
48
+
49
+ for dataservice in get_db().dataservice.find({"is_restricted": True, "has_token": False}):
50
+ log.info(
51
+ f"\tDataservice #{dataservice['_id']} {dataservice['title']} is restricted but without token. (will be set to access_type={DATASERVICE_ACCESS_TYPE_RESTRICTED})"
52
+ )
53
+
54
+ log.info("Processing dataservices…")
55
+
56
+ count = get_db().dataservice.update_many(
57
+ filter={
58
+ "is_restricted": True,
59
+ # `has_token` could be True or False, we don't care
60
+ },
61
+ update={"$set": {"access_type": DATASERVICE_ACCESS_TYPE_RESTRICTED}},
62
+ )
63
+ log.info(
64
+ f"\t{count.modified_count} restricted dataservices to DATASERVICE_ACCESS_TYPE_RESTRICTED"
65
+ )
66
+
67
+ count = get_db().dataservice.update_many(
68
+ filter={
69
+ "is_restricted": False,
70
+ "has_token": True,
71
+ },
72
+ update={"$set": {"access_type": DATASERVICE_ACCESS_TYPE_OPEN_WITH_ACCOUNT}},
73
+ )
74
+ log.info(
75
+ f"\t{count.modified_count} dataservices not restricted but with token to DATASERVICE_ACCESS_TYPE_OPEN_WITH_ACCOUNT"
76
+ )
77
+
78
+ count = get_db().dataservice.update_many(
79
+ filter={
80
+ "is_restricted": False,
81
+ "has_token": False,
82
+ },
83
+ update={"$set": {"access_type": DATASERVICE_ACCESS_TYPE_OPEN}},
84
+ )
85
+ log.info(f"\t{count.modified_count} open dataservices to DATASERVICE_ACCESS_TYPE_OPEN")
86
+
87
+ dataservices: List[Dataservice] = get_db().dataservice.find()
88
+ for dataservice in dataservices:
89
+ if (
90
+ "endpoint_description_url" not in dataservice
91
+ or not dataservice["endpoint_description_url"]
92
+ ):
93
+ continue
94
+
95
+ to_set = {}
96
+ if (
97
+ dataservice["endpoint_description_url"].endswith(".json")
98
+ or dataservice["endpoint_description_url"].endswith(".yaml")
99
+ or dataservice["endpoint_description_url"].endswith(".yml")
100
+ or dataservice["endpoint_description_url"].endswith("?format=openapi-json")
101
+ or "getcapabilities" in dataservice["endpoint_description_url"].lower()
102
+ or "getresourcedescription" in dataservice["endpoint_description_url"].lower()
103
+ or dataservice["endpoint_description_url"].startswith(
104
+ "https://api.insee.fr/catalogue/api-docs/carbon.super"
105
+ )
106
+ ):
107
+ # log.info(f"[MACHINE] {dataservice["endpoint_description_url"]}")
108
+ to_set["machine_documentation_url"] = dataservice["endpoint_description_url"]
109
+ else:
110
+ # log.info(f"[ HUMAN ] {dataservice["endpoint_description_url"]}")
111
+ to_set["technical_documentation_url"] = dataservice["endpoint_description_url"]
112
+
113
+ result = get_db().dataservice.update_one(
114
+ filter={
115
+ "_id": dataservice["_id"],
116
+ },
117
+ update={"$set": to_set},
118
+ )
119
+ assert result.modified_count == 1
120
+ assert result.matched_count == 1
121
+
122
+ log.info("Postprocessing dataservices…")
123
+
124
+ count = get_db().dataservice.update_many(
125
+ {},
126
+ {
127
+ "$unset": {
128
+ "endpoint_description_url": "",
129
+ "is_restricted": "",
130
+ "has_token": "",
131
+ }
132
+ },
133
+ )
134
+ log.info(f"\tUnset legacy fields on {count.modified_count} dataservices")
135
+
136
+ log.info("Done")