udata 10.4.2.dev35427__py2.py3-none-any.whl → 10.4.2.dev35451__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of udata might be problematic. Click here for more details.

Files changed (29) hide show
  1. udata/core/metrics/commands.py +1 -0
  2. udata/core/metrics/helpers.py +102 -0
  3. udata/core/metrics/tasks.py +1 -0
  4. udata/core/site/models.py +33 -0
  5. udata/core/user/activities.py +0 -5
  6. udata/migrations/2025-05-22-purge-duplicate-activities.py +101 -0
  7. udata/settings.py +4 -0
  8. udata/static/chunks/{10.471164b2a9fe15614797.js → 10.8ca60413647062717b1e.js} +3 -3
  9. udata/static/chunks/{10.471164b2a9fe15614797.js.map → 10.8ca60413647062717b1e.js.map} +1 -1
  10. udata/static/chunks/{11.0f04e49a40a0a381bcce.js → 11.51d706fb9521c16976bc.js} +3 -3
  11. udata/static/chunks/{11.0f04e49a40a0a381bcce.js.map → 11.51d706fb9521c16976bc.js.map} +1 -1
  12. udata/static/chunks/{13.2d06442dd9a05d9777b5.js → 13.d9c1735d14038b94c17e.js} +2 -2
  13. udata/static/chunks/{13.2d06442dd9a05d9777b5.js.map → 13.d9c1735d14038b94c17e.js.map} +1 -1
  14. udata/static/chunks/{17.e8e4caaad5cb0cc0bacc.js → 17.81c57c0dedf812e43013.js} +2 -2
  15. udata/static/chunks/{17.e8e4caaad5cb0cc0bacc.js.map → 17.81c57c0dedf812e43013.js.map} +1 -1
  16. udata/static/chunks/{19.df16abde17a42033a7f8.js → 19.a348a5fff8fe2801e52a.js} +3 -3
  17. udata/static/chunks/{19.df16abde17a42033a7f8.js.map → 19.a348a5fff8fe2801e52a.js.map} +1 -1
  18. udata/static/chunks/{8.0f42630e6d8ff782928e.js → 8.462bb3029de008497675.js} +2 -2
  19. udata/static/chunks/{8.0f42630e6d8ff782928e.js.map → 8.462bb3029de008497675.js.map} +1 -1
  20. udata/static/chunks/{9.07515e5187f475bce828.js → 9.033d7e190ca9e226a5d0.js} +3 -3
  21. udata/static/chunks/{9.07515e5187f475bce828.js.map → 9.033d7e190ca9e226a5d0.js.map} +1 -1
  22. udata/static/common.js +1 -1
  23. udata/static/common.js.map +1 -1
  24. {udata-10.4.2.dev35427.dist-info → udata-10.4.2.dev35451.dist-info}/METADATA +3 -1
  25. {udata-10.4.2.dev35427.dist-info → udata-10.4.2.dev35451.dist-info}/RECORD +29 -27
  26. {udata-10.4.2.dev35427.dist-info → udata-10.4.2.dev35451.dist-info}/LICENSE +0 -0
  27. {udata-10.4.2.dev35427.dist-info → udata-10.4.2.dev35451.dist-info}/WHEEL +0 -0
  28. {udata-10.4.2.dev35427.dist-info → udata-10.4.2.dev35451.dist-info}/entry_points.txt +0 -0
  29. {udata-10.4.2.dev35427.dist-info → udata-10.4.2.dev35451.dist-info}/top_level.txt +0 -0
@@ -60,6 +60,7 @@ def update(
60
60
  site.count_max_org_followers()
61
61
  site.count_max_org_reuses()
62
62
  site.count_max_org_datasets()
63
+ site.count_stock_metrics()
63
64
  except Exception as e:
64
65
  log.info(f"Error during update: {e}")
65
66
 
@@ -0,0 +1,102 @@
1
+ import logging
2
+ from collections import OrderedDict
3
+ from datetime import datetime, timedelta
4
+ from typing import Dict, List, Union
5
+
6
+ import requests
7
+ from bson import ObjectId
8
+ from dateutil.rrule import MONTHLY, rrule
9
+ from flask import current_app
10
+ from mongoengine import QuerySet
11
+ from pymongo.command_cursor import CommandCursor
12
+
13
+ log = logging.getLogger(__name__)
14
+
15
+
16
+ def get_last_13_months() -> List[str]:
17
+ dstart = datetime.today().replace(day=1) - timedelta(days=365)
18
+ months = rrule(freq=MONTHLY, count=13, dtstart=dstart)
19
+ return [month.strftime("%Y-%m") for month in months]
20
+
21
+
22
+ def compute_monthly_metrics(metrics_data: List[Dict], metrics_labels: List[str]) -> OrderedDict:
23
+ # Initialize default monthly_metrics
24
+ monthly_metrics = OrderedDict(
25
+ (month, {label: 0 for label in metrics_labels}) for month in get_last_13_months()
26
+ )
27
+ # Update monthly_metrics with metrics_data values
28
+ for entry in metrics_data:
29
+ entry_month = entry["metric_month"]
30
+ if entry_month in monthly_metrics:
31
+ for metric_label in metrics_labels:
32
+ label = f"monthly_{metric_label}"
33
+ monthly_metrics[entry_month][metric_label] = entry.get(label) or 0
34
+ return monthly_metrics
35
+
36
+
37
+ def metrics_by_label(monthly_metrics: Dict, metrics_labels: List[str]) -> List[OrderedDict]:
38
+ metrics_by_label = []
39
+ for label in metrics_labels:
40
+ metrics_by_label.append(
41
+ OrderedDict((month, monthly_metrics[month][label]) for month in monthly_metrics)
42
+ )
43
+ return metrics_by_label
44
+
45
+
46
+ def get_metrics_for_model(
47
+ model: str, id: Union[str, ObjectId, None], metrics_labels: List[str]
48
+ ) -> List[OrderedDict]:
49
+ """
50
+ Get distant metrics for a particular model object
51
+ """
52
+ if not current_app.config["METRICS_API"]:
53
+ # TODO: How to best deal with no METRICS_API, prevent calling or return empty?
54
+ # raise ValueError("missing config METRICS_API to use this function")
55
+ return [{} for _ in range(len(metrics_labels))]
56
+ models = model + "s" if id else model # TODO: not clean of a hack
57
+ model_metrics_api = f"{current_app.config['METRICS_API']}/{models}/data/"
58
+ try:
59
+ params = {"metric_month__sort": "desc"}
60
+ if id:
61
+ params[f"{model}_id__exact"] = id
62
+ res = requests.get(model_metrics_api, params)
63
+ res.raise_for_status()
64
+ monthly_metrics = compute_monthly_metrics(res.json()["data"], metrics_labels)
65
+ return metrics_by_label(monthly_metrics, metrics_labels)
66
+ except requests.exceptions.RequestException as e:
67
+ log.exception(f"Error while getting metrics for {model}({id}): {e}")
68
+ return [{} for _ in range(len(metrics_labels))]
69
+
70
+
71
+ def compute_monthly_aggregated_metrics(aggregation_res: CommandCursor) -> OrderedDict:
72
+ monthly_metrics = OrderedDict((month, 0) for month in get_last_13_months())
73
+ for monthly_count in aggregation_res:
74
+ year, month = monthly_count["_id"].split("-")
75
+ monthly_label = year + "-" + month.zfill(2)
76
+ if monthly_label in monthly_metrics:
77
+ monthly_metrics[monthly_label] = monthly_count["count"]
78
+ return monthly_metrics
79
+
80
+
81
+ def get_stock_metrics(objects: QuerySet, date_label: str = "created_at") -> OrderedDict:
82
+ """
83
+ Get stock metrics for a particular model object
84
+ """
85
+ pipeline = [
86
+ {"$match": {date_label: {"$gte": datetime.now() - timedelta(days=365)}}},
87
+ {
88
+ "$group": {
89
+ "_id": {
90
+ "$concat": [
91
+ {"$substr": [{"$year": f"${date_label}"}, 0, 4]},
92
+ "-",
93
+ {"$substr": [{"$month": f"${date_label}"}, 0, 12]},
94
+ ]
95
+ },
96
+ "count": {"$sum": 1},
97
+ }
98
+ },
99
+ ]
100
+ aggregation_res = objects.aggregate(*pipeline)
101
+
102
+ return compute_monthly_aggregated_metrics(aggregation_res)
@@ -24,5 +24,6 @@ def compute_site_metrics(self):
24
24
  site.count_max_org_followers()
25
25
  site.count_max_org_reuses()
26
26
  site.count_max_org_datasets()
27
+ site.count_stock_metrics()
27
28
  # Sending signal
28
29
  on_site_metrics_computed.send(site)
udata/core/site/models.py CHANGED
@@ -3,6 +3,7 @@ from werkzeug.local import LocalProxy
3
3
 
4
4
  from udata.core.dataservices.models import Dataservice
5
5
  from udata.core.dataset.models import Dataset
6
+ from udata.core.metrics.helpers import get_metrics_for_model, get_stock_metrics
6
7
  from udata.core.organization.models import Organization
7
8
  from udata.core.reuse.models import Reuse
8
9
  from udata.models import WithMetrics, db
@@ -36,15 +37,23 @@ class Site(WithMetrics, db.Document):
36
37
  "max_org_reuses",
37
38
  "max_org_datasets",
38
39
  "datasets",
40
+ "datasets_visits_by_months",
39
41
  "discussions",
40
42
  "followers",
41
43
  "organizations",
42
44
  "public-service",
43
45
  "resources",
46
+ "resources_downloads_by_months",
44
47
  "reuses",
45
48
  "dataservices",
46
49
  "users",
47
50
  "harvesters",
51
+ "users_by_months",
52
+ "datasets_by_months",
53
+ "harvesters_by_months",
54
+ "reuses_by_months",
55
+ "organizations_by_months",
56
+ "discussions_by_months",
48
57
  ]
49
58
 
50
59
  def __str__(self):
@@ -72,6 +81,9 @@ class Site(WithMetrics, db.Document):
72
81
  from udata.models import Dataset
73
82
 
74
83
  self.metrics["datasets"] = Dataset.objects.visible().count()
84
+ self.metrics["datasets_visits_by_months"] = get_metrics_for_model(
85
+ "site", None, ["visit_dataset"]
86
+ )[0]
75
87
  self.save()
76
88
 
77
89
  def count_resources(self):
@@ -83,6 +95,9 @@ class Site(WithMetrics, db.Document):
83
95
  ),
84
96
  {},
85
97
  ).get("count", 0)
98
+ self.metrics["resources_downloads_by_months"] = get_metrics_for_model(
99
+ "site", None, ["download_resource"]
100
+ )[0]
86
101
  self.save()
87
102
 
88
103
  def count_reuses(self):
@@ -172,6 +187,24 @@ class Site(WithMetrics, db.Document):
172
187
  self.metrics["max_org_datasets"] = org.metrics["datasets"] if org else 0
173
188
  self.save()
174
189
 
190
+ def count_stock_metrics(self):
191
+ from udata.harvest.models import HarvestSource
192
+ from udata.models import Discussion, User
193
+
194
+ self.metrics["users_by_months"] = get_stock_metrics(User.objects())
195
+ self.metrics["datasets_by_months"] = get_stock_metrics(
196
+ Dataset.objects().visible(), date_label="created_at_internal"
197
+ )
198
+ self.metrics["harvesters_by_months"] = get_stock_metrics(HarvestSource.objects())
199
+ self.metrics["reuses_by_months"] = get_stock_metrics(Reuse.objects().visible())
200
+ self.metrics["organizations_by_months"] = get_stock_metrics(
201
+ Organization.objects().visible()
202
+ )
203
+ self.metrics["discussions_by_months"] = get_stock_metrics(
204
+ Discussion.objects(), date_label="created"
205
+ )
206
+ self.save()
207
+
175
208
 
176
209
  def get_current_site():
177
210
  if getattr(g, "site", None) is None:
@@ -30,11 +30,6 @@ class DiscussActivity(object):
30
30
  badge_type = "warning"
31
31
 
32
32
 
33
- class UserStarredOrganization(FollowActivity, OrgRelatedActivity, Activity):
34
- key = "organization:followed"
35
- label = _("followed an organization")
36
-
37
-
38
33
  class UserFollowedUser(FollowActivity, Activity):
39
34
  key = "user:followed"
40
35
  label = _("followed a user")
@@ -0,0 +1,101 @@
1
+ """
2
+ This migration updates Topic.featured to False when it is None.
3
+ """
4
+
5
+ import logging
6
+ from datetime import datetime, timedelta
7
+
8
+ from mongoengine.connection import get_db
9
+
10
+ from udata.core.dataset.activities import UserCreatedDataset, UserDeletedDataset, UserUpdatedDataset
11
+ from udata.core.organization.activities import UserUpdatedOrganization
12
+ from udata.core.reuse.activities import UserCreatedReuse, UserDeletedReuse, UserUpdatedReuse
13
+ from udata.core.user.activities import (
14
+ UserDiscussedDataset,
15
+ UserDiscussedReuse,
16
+ UserFollowedDataset,
17
+ UserFollowedOrganization,
18
+ UserFollowedReuse,
19
+ )
20
+
21
+ log = logging.getLogger(__name__)
22
+
23
+
24
+ def migrate(db):
25
+ # Remove legacy fields (`as_organization`, `kwargs`) from old activities
26
+ result = get_db().activity.update_many({}, {"$unset": {"as_organization": ""}})
27
+ log.info(
28
+ f"Legacy field `as_organization` removed from {result.modified_count} activity objects"
29
+ )
30
+
31
+ result = get_db().activity.update_many({}, {"$unset": {"kwargs": ""}})
32
+ log.info(f"Legacy field `kwargs` removed from {result.modified_count} activity objects")
33
+
34
+ # Clean duplicate activities in case of discussion or following
35
+ # - remove the "updated" activity on the discussed/followed object
36
+ # - remove the activity on the organization
37
+ # The heuristic is to look for specific activities by the same actor on the targeted object
38
+ # within a -1 +1 second timespan
39
+ for action_related_activity, object_updated_activity in [
40
+ (UserDiscussedDataset, UserUpdatedDataset),
41
+ (UserDiscussedReuse, UserUpdatedReuse),
42
+ (UserFollowedDataset, UserUpdatedDataset),
43
+ (UserFollowedReuse, UserUpdatedReuse),
44
+ ]:
45
+ org_activity_count = 0
46
+ object_activity_count = 0
47
+ activities = (
48
+ action_related_activity.objects()
49
+ .no_dereference() # We use no_dereference in query to prevent DBref DoesNotExist errors
50
+ .no_cache()
51
+ .timeout(False)
52
+ )
53
+ log.info(
54
+ f"{datetime.utcnow()}: Processing {activities.count()} {action_related_activity} activities..."
55
+ )
56
+ for act in activities:
57
+ object_activity_count += object_updated_activity.objects(
58
+ actor=act.actor.id,
59
+ related_to=act.related_to.id,
60
+ created_at__gte=act.created_at - timedelta(seconds=1),
61
+ created_at__lte=act.created_at + timedelta(seconds=1),
62
+ ).delete()
63
+ if act.organization:
64
+ org_activity_count += UserUpdatedOrganization.objects(
65
+ actor=act.actor.id,
66
+ related_to=act.organization,
67
+ created_at__gte=act.created_at - timedelta(seconds=1),
68
+ created_at__lte=act.created_at + timedelta(seconds=1),
69
+ ).delete()
70
+ log.info(
71
+ f"{datetime.utcnow()}: Deleted {object_activity_count} {object_updated_activity} and {org_activity_count} UserUpdatedOrganization activities"
72
+ )
73
+
74
+ # Clean duplicated UserUpdatedOrganization activities on organization for any object related activity
75
+ for object_related_activity in [
76
+ UserCreatedDataset,
77
+ UserUpdatedDataset,
78
+ UserDeletedDataset,
79
+ UserCreatedReuse,
80
+ UserUpdatedReuse,
81
+ UserDeletedReuse,
82
+ UserFollowedOrganization,
83
+ ]:
84
+ count = 0
85
+ activities = (
86
+ object_related_activity.objects(organization__exists=True)
87
+ .no_dereference() # We use no_dereference in query to prevent DBref DoesNotExist errors
88
+ .no_cache()
89
+ .timeout(False)
90
+ )
91
+ log.info(
92
+ f"{datetime.utcnow()}: Processing {activities.count()} {object_related_activity} activities..."
93
+ )
94
+ for act in activities:
95
+ count += UserUpdatedOrganization.objects(
96
+ actor=act.actor.id,
97
+ related_to=act.organization,
98
+ created_at__gte=act.created_at - timedelta(seconds=1),
99
+ created_at__lte=act.created_at + timedelta(seconds=1),
100
+ ).delete()
101
+ log.info(f"{datetime.utcnow()}: Deleted {count} UserUpdatedOrganization activities")
udata/settings.py CHANGED
@@ -574,6 +574,10 @@ class Defaults(object):
574
574
  ###########################################################################
575
575
  MAX_RESOURCES_IN_JSON_LD = 20
576
576
 
577
+ # Metrics settings
578
+ ###########################################################################
579
+ METRICS_API = None
580
+
577
581
 
578
582
  class Testing(object):
579
583
  """Sane values for testing. Should be applied as override"""