udata 9.1.2.dev30355__py2.py3-none-any.whl → 9.1.2.dev30454__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of udata might be problematic. Click here for more details.

Files changed (413) hide show
  1. tasks/__init__.py +109 -107
  2. tasks/helpers.py +18 -18
  3. udata/__init__.py +4 -4
  4. udata/admin/views.py +5 -5
  5. udata/api/__init__.py +111 -134
  6. udata/api/commands.py +45 -37
  7. udata/api/errors.py +5 -4
  8. udata/api/fields.py +23 -21
  9. udata/api/oauth2.py +55 -74
  10. udata/api/parsers.py +15 -15
  11. udata/api/signals.py +1 -1
  12. udata/api_fields.py +137 -89
  13. udata/app.py +58 -55
  14. udata/assets.py +5 -5
  15. udata/auth/__init__.py +37 -26
  16. udata/auth/forms.py +23 -15
  17. udata/auth/helpers.py +1 -1
  18. udata/auth/mails.py +3 -3
  19. udata/auth/password_validation.py +19 -15
  20. udata/auth/views.py +94 -68
  21. udata/commands/__init__.py +71 -69
  22. udata/commands/cache.py +7 -7
  23. udata/commands/db.py +201 -140
  24. udata/commands/dcat.py +36 -30
  25. udata/commands/fixtures.py +100 -84
  26. udata/commands/images.py +21 -20
  27. udata/commands/info.py +17 -20
  28. udata/commands/init.py +10 -10
  29. udata/commands/purge.py +12 -13
  30. udata/commands/serve.py +41 -29
  31. udata/commands/static.py +16 -18
  32. udata/commands/test.py +20 -20
  33. udata/commands/tests/fixtures.py +26 -24
  34. udata/commands/worker.py +31 -33
  35. udata/core/__init__.py +12 -12
  36. udata/core/activity/__init__.py +0 -1
  37. udata/core/activity/api.py +59 -49
  38. udata/core/activity/models.py +28 -26
  39. udata/core/activity/signals.py +1 -1
  40. udata/core/activity/tasks.py +16 -10
  41. udata/core/badges/api.py +6 -6
  42. udata/core/badges/commands.py +14 -13
  43. udata/core/badges/fields.py +8 -5
  44. udata/core/badges/forms.py +7 -4
  45. udata/core/badges/models.py +16 -31
  46. udata/core/badges/permissions.py +1 -3
  47. udata/core/badges/signals.py +2 -2
  48. udata/core/badges/tasks.py +3 -2
  49. udata/core/badges/tests/test_commands.py +10 -10
  50. udata/core/badges/tests/test_model.py +24 -31
  51. udata/core/contact_point/api.py +19 -18
  52. udata/core/contact_point/api_fields.py +21 -14
  53. udata/core/contact_point/factories.py +2 -2
  54. udata/core/contact_point/forms.py +7 -6
  55. udata/core/contact_point/models.py +3 -5
  56. udata/core/dataservices/api.py +26 -21
  57. udata/core/dataservices/factories.py +13 -11
  58. udata/core/dataservices/models.py +35 -40
  59. udata/core/dataservices/permissions.py +4 -4
  60. udata/core/dataservices/rdf.py +40 -17
  61. udata/core/dataservices/tasks.py +4 -3
  62. udata/core/dataset/actions.py +10 -10
  63. udata/core/dataset/activities.py +21 -23
  64. udata/core/dataset/api.py +321 -298
  65. udata/core/dataset/api_fields.py +443 -271
  66. udata/core/dataset/apiv2.py +305 -229
  67. udata/core/dataset/commands.py +38 -36
  68. udata/core/dataset/constants.py +61 -54
  69. udata/core/dataset/csv.py +70 -74
  70. udata/core/dataset/events.py +39 -32
  71. udata/core/dataset/exceptions.py +8 -4
  72. udata/core/dataset/factories.py +57 -65
  73. udata/core/dataset/forms.py +87 -63
  74. udata/core/dataset/models.py +336 -280
  75. udata/core/dataset/permissions.py +9 -6
  76. udata/core/dataset/preview.py +15 -17
  77. udata/core/dataset/rdf.py +156 -122
  78. udata/core/dataset/search.py +92 -77
  79. udata/core/dataset/signals.py +1 -1
  80. udata/core/dataset/tasks.py +63 -54
  81. udata/core/discussions/actions.py +5 -5
  82. udata/core/discussions/api.py +124 -120
  83. udata/core/discussions/factories.py +2 -2
  84. udata/core/discussions/forms.py +9 -7
  85. udata/core/discussions/metrics.py +1 -3
  86. udata/core/discussions/models.py +25 -24
  87. udata/core/discussions/notifications.py +18 -14
  88. udata/core/discussions/permissions.py +3 -3
  89. udata/core/discussions/signals.py +4 -4
  90. udata/core/discussions/tasks.py +24 -28
  91. udata/core/followers/api.py +32 -33
  92. udata/core/followers/models.py +9 -9
  93. udata/core/followers/signals.py +3 -3
  94. udata/core/jobs/actions.py +7 -7
  95. udata/core/jobs/api.py +99 -92
  96. udata/core/jobs/commands.py +48 -49
  97. udata/core/jobs/forms.py +11 -11
  98. udata/core/jobs/models.py +6 -6
  99. udata/core/metrics/__init__.py +2 -2
  100. udata/core/metrics/commands.py +34 -30
  101. udata/core/metrics/models.py +2 -4
  102. udata/core/metrics/signals.py +1 -1
  103. udata/core/metrics/tasks.py +3 -3
  104. udata/core/organization/activities.py +12 -15
  105. udata/core/organization/api.py +167 -174
  106. udata/core/organization/api_fields.py +183 -124
  107. udata/core/organization/apiv2.py +32 -32
  108. udata/core/organization/commands.py +20 -22
  109. udata/core/organization/constants.py +11 -11
  110. udata/core/organization/csv.py +17 -15
  111. udata/core/organization/factories.py +8 -11
  112. udata/core/organization/forms.py +32 -26
  113. udata/core/organization/metrics.py +2 -1
  114. udata/core/organization/models.py +87 -67
  115. udata/core/organization/notifications.py +18 -14
  116. udata/core/organization/permissions.py +10 -11
  117. udata/core/organization/rdf.py +14 -14
  118. udata/core/organization/search.py +30 -28
  119. udata/core/organization/signals.py +7 -7
  120. udata/core/organization/tasks.py +42 -61
  121. udata/core/owned.py +38 -27
  122. udata/core/post/api.py +82 -81
  123. udata/core/post/constants.py +8 -5
  124. udata/core/post/factories.py +4 -4
  125. udata/core/post/forms.py +13 -14
  126. udata/core/post/models.py +20 -22
  127. udata/core/post/tests/test_api.py +30 -32
  128. udata/core/reports/api.py +8 -7
  129. udata/core/reports/constants.py +1 -3
  130. udata/core/reports/models.py +10 -10
  131. udata/core/reuse/activities.py +15 -19
  132. udata/core/reuse/api.py +123 -126
  133. udata/core/reuse/api_fields.py +120 -85
  134. udata/core/reuse/apiv2.py +11 -10
  135. udata/core/reuse/constants.py +23 -23
  136. udata/core/reuse/csv.py +18 -18
  137. udata/core/reuse/factories.py +5 -9
  138. udata/core/reuse/forms.py +24 -21
  139. udata/core/reuse/models.py +55 -51
  140. udata/core/reuse/permissions.py +2 -2
  141. udata/core/reuse/search.py +49 -46
  142. udata/core/reuse/signals.py +1 -1
  143. udata/core/reuse/tasks.py +4 -5
  144. udata/core/site/api.py +47 -50
  145. udata/core/site/factories.py +2 -2
  146. udata/core/site/forms.py +4 -5
  147. udata/core/site/models.py +94 -63
  148. udata/core/site/rdf.py +14 -14
  149. udata/core/spam/api.py +16 -9
  150. udata/core/spam/constants.py +4 -4
  151. udata/core/spam/fields.py +13 -7
  152. udata/core/spam/models.py +27 -20
  153. udata/core/spam/signals.py +1 -1
  154. udata/core/spam/tests/test_spam.py +6 -5
  155. udata/core/spatial/api.py +72 -80
  156. udata/core/spatial/api_fields.py +73 -58
  157. udata/core/spatial/commands.py +67 -64
  158. udata/core/spatial/constants.py +3 -3
  159. udata/core/spatial/factories.py +37 -54
  160. udata/core/spatial/forms.py +27 -26
  161. udata/core/spatial/geoids.py +17 -17
  162. udata/core/spatial/models.py +43 -47
  163. udata/core/spatial/tasks.py +2 -1
  164. udata/core/spatial/tests/test_api.py +115 -130
  165. udata/core/spatial/tests/test_fields.py +74 -77
  166. udata/core/spatial/tests/test_geoid.py +22 -22
  167. udata/core/spatial/tests/test_models.py +5 -7
  168. udata/core/spatial/translations.py +16 -16
  169. udata/core/storages/__init__.py +16 -18
  170. udata/core/storages/api.py +66 -64
  171. udata/core/storages/tasks.py +7 -7
  172. udata/core/storages/utils.py +15 -15
  173. udata/core/storages/views.py +5 -6
  174. udata/core/tags/api.py +17 -14
  175. udata/core/tags/csv.py +4 -4
  176. udata/core/tags/models.py +8 -5
  177. udata/core/tags/tasks.py +11 -13
  178. udata/core/tags/views.py +4 -4
  179. udata/core/topic/api.py +84 -73
  180. udata/core/topic/apiv2.py +157 -127
  181. udata/core/topic/factories.py +3 -4
  182. udata/core/topic/forms.py +12 -14
  183. udata/core/topic/models.py +14 -19
  184. udata/core/topic/parsers.py +26 -26
  185. udata/core/user/activities.py +30 -29
  186. udata/core/user/api.py +151 -152
  187. udata/core/user/api_fields.py +132 -100
  188. udata/core/user/apiv2.py +7 -7
  189. udata/core/user/commands.py +38 -38
  190. udata/core/user/factories.py +8 -9
  191. udata/core/user/forms.py +14 -11
  192. udata/core/user/metrics.py +2 -2
  193. udata/core/user/models.py +68 -69
  194. udata/core/user/permissions.py +4 -5
  195. udata/core/user/rdf.py +7 -8
  196. udata/core/user/tasks.py +2 -2
  197. udata/core/user/tests/test_user_model.py +24 -16
  198. udata/cors.py +99 -0
  199. udata/db/tasks.py +2 -1
  200. udata/entrypoints.py +35 -31
  201. udata/errors.py +2 -1
  202. udata/event/values.py +6 -6
  203. udata/factories.py +2 -2
  204. udata/features/identicon/api.py +5 -6
  205. udata/features/identicon/backends.py +48 -55
  206. udata/features/identicon/tests/test_backends.py +4 -5
  207. udata/features/notifications/__init__.py +0 -1
  208. udata/features/notifications/actions.py +9 -9
  209. udata/features/notifications/api.py +17 -13
  210. udata/features/territories/__init__.py +12 -10
  211. udata/features/territories/api.py +14 -15
  212. udata/features/territories/models.py +23 -28
  213. udata/features/transfer/actions.py +8 -11
  214. udata/features/transfer/api.py +84 -77
  215. udata/features/transfer/factories.py +2 -1
  216. udata/features/transfer/models.py +11 -12
  217. udata/features/transfer/notifications.py +19 -15
  218. udata/features/transfer/permissions.py +5 -5
  219. udata/forms/__init__.py +5 -2
  220. udata/forms/fields.py +164 -172
  221. udata/forms/validators.py +19 -22
  222. udata/forms/widgets.py +9 -13
  223. udata/frontend/__init__.py +31 -26
  224. udata/frontend/csv.py +68 -58
  225. udata/frontend/markdown.py +40 -44
  226. udata/harvest/actions.py +89 -77
  227. udata/harvest/api.py +294 -238
  228. udata/harvest/backends/__init__.py +4 -4
  229. udata/harvest/backends/base.py +128 -111
  230. udata/harvest/backends/dcat.py +80 -66
  231. udata/harvest/commands.py +56 -60
  232. udata/harvest/csv.py +8 -8
  233. udata/harvest/exceptions.py +6 -3
  234. udata/harvest/filters.py +24 -23
  235. udata/harvest/forms.py +27 -28
  236. udata/harvest/models.py +88 -80
  237. udata/harvest/notifications.py +15 -10
  238. udata/harvest/signals.py +13 -13
  239. udata/harvest/tasks.py +11 -10
  240. udata/harvest/tests/factories.py +23 -24
  241. udata/harvest/tests/test_actions.py +136 -166
  242. udata/harvest/tests/test_api.py +220 -214
  243. udata/harvest/tests/test_base_backend.py +117 -112
  244. udata/harvest/tests/test_dcat_backend.py +380 -308
  245. udata/harvest/tests/test_filters.py +33 -22
  246. udata/harvest/tests/test_models.py +11 -14
  247. udata/harvest/tests/test_notifications.py +6 -7
  248. udata/harvest/tests/test_tasks.py +7 -6
  249. udata/i18n.py +237 -78
  250. udata/linkchecker/backends.py +5 -11
  251. udata/linkchecker/checker.py +23 -22
  252. udata/linkchecker/commands.py +4 -6
  253. udata/linkchecker/models.py +6 -6
  254. udata/linkchecker/tasks.py +18 -20
  255. udata/mail.py +21 -21
  256. udata/migrations/2020-07-24-remove-s-from-scope-oauth.py +9 -8
  257. udata/migrations/2020-08-24-add-fs-filename.py +9 -8
  258. udata/migrations/2020-09-28-update-reuses-datasets-metrics.py +5 -4
  259. udata/migrations/2020-10-16-migrate-ods-resources.py +9 -10
  260. udata/migrations/2021-04-08-update-schema-with-new-structure.py +8 -7
  261. udata/migrations/2021-05-27-fix-default-schema-name.py +7 -6
  262. udata/migrations/2021-07-05-remove-unused-badges.py +17 -15
  263. udata/migrations/2021-07-07-update-schema-for-community-resources.py +7 -6
  264. udata/migrations/2021-08-17-follow-integrity.py +5 -4
  265. udata/migrations/2021-08-17-harvest-integrity.py +13 -12
  266. udata/migrations/2021-08-17-oauth2client-integrity.py +5 -4
  267. udata/migrations/2021-08-17-transfer-integrity.py +5 -4
  268. udata/migrations/2021-08-17-users-integrity.py +9 -8
  269. udata/migrations/2021-12-14-reuse-topics.py +7 -6
  270. udata/migrations/2022-04-21-improve-extension-detection.py +8 -7
  271. udata/migrations/2022-09-22-clean-inactive-harvest-datasets.py +16 -14
  272. udata/migrations/2022-10-10-add-fs_uniquifier-to-user-model.py +6 -6
  273. udata/migrations/2022-10-10-migrate-harvest-extras.py +36 -26
  274. udata/migrations/2023-02-08-rename-internal-dates.py +46 -28
  275. udata/migrations/2024-01-29-fix-reuse-and-dataset-with-private-None.py +10 -8
  276. udata/migrations/2024-03-22-migrate-activity-kwargs-to-extras.py +6 -4
  277. udata/migrations/2024-06-11-fix-reuse-datasets-references.py +7 -6
  278. udata/migrations/__init__.py +123 -105
  279. udata/models/__init__.py +4 -4
  280. udata/mongo/__init__.py +13 -11
  281. udata/mongo/badges_field.py +3 -2
  282. udata/mongo/datetime_fields.py +13 -12
  283. udata/mongo/document.py +17 -16
  284. udata/mongo/engine.py +15 -16
  285. udata/mongo/errors.py +2 -1
  286. udata/mongo/extras_fields.py +30 -20
  287. udata/mongo/queryset.py +12 -12
  288. udata/mongo/slug_fields.py +38 -28
  289. udata/mongo/taglist_field.py +1 -2
  290. udata/mongo/url_field.py +5 -5
  291. udata/mongo/uuid_fields.py +4 -3
  292. udata/notifications/__init__.py +1 -1
  293. udata/notifications/mattermost.py +10 -9
  294. udata/rdf.py +167 -188
  295. udata/routing.py +40 -45
  296. udata/search/__init__.py +18 -19
  297. udata/search/adapter.py +17 -16
  298. udata/search/commands.py +44 -51
  299. udata/search/fields.py +13 -20
  300. udata/search/query.py +23 -18
  301. udata/search/result.py +9 -10
  302. udata/sentry.py +21 -19
  303. udata/settings.py +262 -198
  304. udata/sitemap.py +8 -6
  305. udata/storage/s3.py +20 -13
  306. udata/tags.py +4 -5
  307. udata/tasks.py +43 -42
  308. udata/tests/__init__.py +9 -6
  309. udata/tests/api/__init__.py +8 -6
  310. udata/tests/api/test_auth_api.py +395 -321
  311. udata/tests/api/test_base_api.py +33 -35
  312. udata/tests/api/test_contact_points.py +7 -9
  313. udata/tests/api/test_dataservices_api.py +211 -158
  314. udata/tests/api/test_datasets_api.py +823 -812
  315. udata/tests/api/test_follow_api.py +13 -15
  316. udata/tests/api/test_me_api.py +95 -112
  317. udata/tests/api/test_organizations_api.py +301 -339
  318. udata/tests/api/test_reports_api.py +35 -25
  319. udata/tests/api/test_reuses_api.py +134 -139
  320. udata/tests/api/test_swagger.py +5 -5
  321. udata/tests/api/test_tags_api.py +18 -25
  322. udata/tests/api/test_topics_api.py +94 -94
  323. udata/tests/api/test_transfer_api.py +53 -48
  324. udata/tests/api/test_user_api.py +128 -141
  325. udata/tests/apiv2/test_datasets.py +290 -198
  326. udata/tests/apiv2/test_me_api.py +10 -11
  327. udata/tests/apiv2/test_organizations.py +56 -74
  328. udata/tests/apiv2/test_swagger.py +5 -5
  329. udata/tests/apiv2/test_topics.py +69 -87
  330. udata/tests/cli/test_cli_base.py +8 -8
  331. udata/tests/cli/test_db_cli.py +21 -19
  332. udata/tests/dataservice/test_dataservice_tasks.py +8 -12
  333. udata/tests/dataset/test_csv_adapter.py +44 -35
  334. udata/tests/dataset/test_dataset_actions.py +2 -3
  335. udata/tests/dataset/test_dataset_commands.py +7 -8
  336. udata/tests/dataset/test_dataset_events.py +36 -29
  337. udata/tests/dataset/test_dataset_model.py +224 -217
  338. udata/tests/dataset/test_dataset_rdf.py +142 -131
  339. udata/tests/dataset/test_dataset_tasks.py +15 -15
  340. udata/tests/dataset/test_resource_preview.py +10 -13
  341. udata/tests/features/territories/__init__.py +9 -13
  342. udata/tests/features/territories/test_territories_api.py +71 -91
  343. udata/tests/forms/test_basic_fields.py +7 -7
  344. udata/tests/forms/test_current_user_field.py +39 -66
  345. udata/tests/forms/test_daterange_field.py +31 -39
  346. udata/tests/forms/test_dict_field.py +28 -26
  347. udata/tests/forms/test_extras_fields.py +102 -76
  348. udata/tests/forms/test_form_field.py +8 -8
  349. udata/tests/forms/test_image_field.py +33 -26
  350. udata/tests/forms/test_model_field.py +134 -123
  351. udata/tests/forms/test_model_list_field.py +7 -7
  352. udata/tests/forms/test_nested_model_list_field.py +117 -79
  353. udata/tests/forms/test_publish_as_field.py +36 -65
  354. udata/tests/forms/test_reference_field.py +34 -53
  355. udata/tests/forms/test_user_forms.py +23 -21
  356. udata/tests/forms/test_uuid_field.py +6 -10
  357. udata/tests/frontend/__init__.py +9 -6
  358. udata/tests/frontend/test_auth.py +7 -6
  359. udata/tests/frontend/test_csv.py +81 -96
  360. udata/tests/frontend/test_hooks.py +43 -43
  361. udata/tests/frontend/test_markdown.py +211 -191
  362. udata/tests/helpers.py +32 -37
  363. udata/tests/models.py +2 -2
  364. udata/tests/organization/test_csv_adapter.py +21 -16
  365. udata/tests/organization/test_notifications.py +11 -18
  366. udata/tests/organization/test_organization_model.py +13 -13
  367. udata/tests/organization/test_organization_rdf.py +29 -22
  368. udata/tests/organization/test_organization_tasks.py +16 -17
  369. udata/tests/plugin.py +79 -73
  370. udata/tests/reuse/test_reuse_model.py +21 -21
  371. udata/tests/reuse/test_reuse_task.py +11 -13
  372. udata/tests/search/__init__.py +11 -12
  373. udata/tests/search/test_adapter.py +60 -70
  374. udata/tests/search/test_query.py +16 -16
  375. udata/tests/search/test_results.py +10 -7
  376. udata/tests/site/test_site_api.py +11 -16
  377. udata/tests/site/test_site_metrics.py +20 -30
  378. udata/tests/site/test_site_model.py +4 -5
  379. udata/tests/site/test_site_rdf.py +94 -78
  380. udata/tests/test_activity.py +17 -17
  381. udata/tests/test_cors.py +62 -0
  382. udata/tests/test_discussions.py +292 -299
  383. udata/tests/test_i18n.py +37 -40
  384. udata/tests/test_linkchecker.py +91 -85
  385. udata/tests/test_mail.py +13 -17
  386. udata/tests/test_migrations.py +219 -180
  387. udata/tests/test_model.py +164 -157
  388. udata/tests/test_notifications.py +17 -17
  389. udata/tests/test_owned.py +14 -14
  390. udata/tests/test_rdf.py +25 -23
  391. udata/tests/test_routing.py +89 -93
  392. udata/tests/test_storages.py +137 -128
  393. udata/tests/test_tags.py +44 -46
  394. udata/tests/test_topics.py +7 -7
  395. udata/tests/test_transfer.py +42 -49
  396. udata/tests/test_uris.py +160 -161
  397. udata/tests/test_utils.py +79 -71
  398. udata/tests/user/test_user_rdf.py +5 -9
  399. udata/tests/workers/test_jobs_commands.py +57 -58
  400. udata/tests/workers/test_tasks_routing.py +23 -29
  401. udata/tests/workers/test_workers_api.py +125 -131
  402. udata/tests/workers/test_workers_helpers.py +6 -6
  403. udata/tracking.py +4 -6
  404. udata/uris.py +45 -46
  405. udata/utils.py +68 -66
  406. udata/wsgi.py +1 -1
  407. {udata-9.1.2.dev30355.dist-info → udata-9.1.2.dev30454.dist-info}/METADATA +7 -3
  408. udata-9.1.2.dev30454.dist-info/RECORD +706 -0
  409. udata-9.1.2.dev30355.dist-info/RECORD +0 -704
  410. {udata-9.1.2.dev30355.dist-info → udata-9.1.2.dev30454.dist-info}/LICENSE +0 -0
  411. {udata-9.1.2.dev30355.dist-info → udata-9.1.2.dev30454.dist-info}/WHEEL +0 -0
  412. {udata-9.1.2.dev30355.dist-info → udata-9.1.2.dev30454.dist-info}/entry_points.txt +0 -0
  413. {udata-9.1.2.dev30355.dist-info → udata-9.1.2.dev30454.dist-info}/top_level.txt +0 -0
@@ -1,8 +1,8 @@
1
- from udata.entrypoints import get_enabled, EntrypointError
1
+ from udata.entrypoints import EntrypointError, get_enabled
2
2
 
3
3
 
4
4
  def get(app, name):
5
- '''Get a backend given its name'''
5
+ """Get a backend given its name"""
6
6
  backend = get_all(app).get(name)
7
7
  if not backend:
8
8
  msg = 'Harvest backend "{0}" is not registered'.format(name)
@@ -11,7 +11,7 @@ def get(app, name):
11
11
 
12
12
 
13
13
  def get_all(app):
14
- return get_enabled('udata.harvesters', app)
14
+ return get_enabled("udata.harvesters", app)
15
15
 
16
16
 
17
- from .base import BaseBackend, HarvestFilter, HarvestFeature # flake8: noqa
17
+ from .base import BaseBackend, HarvestFeature, HarvestFilter # flake8: noqa
@@ -1,23 +1,27 @@
1
1
  import logging
2
2
  import traceback
3
-
4
- from datetime import datetime, date, timedelta
3
+ from datetime import date, datetime, timedelta
5
4
  from uuid import UUID
6
5
 
7
6
  import requests
8
-
9
7
  from flask import current_app
10
- from udata.core.dataservices.models import Dataservice
11
8
  from voluptuous import MultipleInvalid, RequiredFieldInvalid
12
9
 
13
- from udata.core.dataset.models import HarvestDatasetMetadata
10
+ from udata.core.dataservices.models import Dataservice
14
11
  from udata.core.dataservices.models import HarvestMetadata as HarvestDataserviceMetadata
12
+ from udata.core.dataset.models import HarvestDatasetMetadata
15
13
  from udata.models import Dataset
16
14
  from udata.utils import safe_unicode
17
15
 
18
16
  from ..exceptions import HarvestException, HarvestSkipException, HarvestValidationError
19
- from ..models import HarvestItem, HarvestJob, HarvestError, HarvestLog, archive_harvested_dataset
20
- from ..signals import before_harvest_job, after_harvest_job
17
+ from ..models import (
18
+ HarvestError,
19
+ HarvestItem,
20
+ HarvestJob,
21
+ HarvestLog,
22
+ archive_harvested_dataset,
23
+ )
24
+ from ..signals import after_harvest_job, before_harvest_job
21
25
 
22
26
  log = logging.getLogger(__name__)
23
27
 
@@ -27,18 +31,18 @@ requests.packages.urllib3.disable_warnings()
27
31
 
28
32
  class HarvestFilter(object):
29
33
  TYPES = {
30
- str: 'string',
31
- bytes: 'string',
32
- int: 'integer',
33
- bool: 'boolean',
34
- UUID: 'uuid',
35
- datetime: 'date-time',
36
- date: 'date',
34
+ str: "string",
35
+ bytes: "string",
36
+ int: "integer",
37
+ bool: "boolean",
38
+ UUID: "uuid",
39
+ datetime: "date-time",
40
+ date: "date",
37
41
  }
38
42
 
39
43
  def __init__(self, label, key, type, description=None):
40
44
  if type not in self.TYPES:
41
- raise TypeError('Unsupported type {0}'.format(type))
45
+ raise TypeError("Unsupported type {0}".format(type))
42
46
  self.label = label
43
47
  self.key = key
44
48
  self.type = type
@@ -46,10 +50,10 @@ class HarvestFilter(object):
46
50
 
47
51
  def as_dict(self):
48
52
  return {
49
- 'label': self.label,
50
- 'key': self.key,
51
- 'type': self.TYPES[self.type],
52
- 'description': self.description,
53
+ "label": self.label,
54
+ "key": self.key,
55
+ "type": self.TYPES[self.type],
56
+ "description": self.description,
53
57
  }
54
58
 
55
59
 
@@ -62,10 +66,10 @@ class HarvestFeature(object):
62
66
 
63
67
  def as_dict(self):
64
68
  return {
65
- 'key': self.key,
66
- 'label': self.label,
67
- 'description': self.description,
68
- 'default': self.default,
69
+ "key": self.key,
70
+ "label": self.label,
71
+ "description": self.description,
72
+ "default": self.default,
69
73
  }
70
74
 
71
75
 
@@ -95,7 +99,7 @@ class BaseBackend(object):
95
99
  self.source = source_or_job
96
100
  self.job = None
97
101
  self.dryrun = dryrun
98
- self.max_items = max_items or current_app.config['HARVEST_MAX_ITEMS']
102
+ self.max_items = max_items or current_app.config["HARVEST_MAX_ITEMS"]
99
103
 
100
104
  @property
101
105
  def config(self):
@@ -103,38 +107,38 @@ class BaseBackend(object):
103
107
 
104
108
  def head(self, url, headers={}, **kwargs):
105
109
  headers.update(self.get_headers())
106
- kwargs['verify'] = kwargs.get('verify', self.verify_ssl)
110
+ kwargs["verify"] = kwargs.get("verify", self.verify_ssl)
107
111
  return requests.head(url, headers=headers, **kwargs)
108
112
 
109
113
  def get(self, url, headers={}, **kwargs):
110
114
  headers.update(self.get_headers())
111
- kwargs['verify'] = kwargs.get('verify', self.verify_ssl)
115
+ kwargs["verify"] = kwargs.get("verify", self.verify_ssl)
112
116
  return requests.get(url, headers=headers, **kwargs)
113
117
 
114
118
  def post(self, url, data, headers={}, **kwargs):
115
119
  headers.update(self.get_headers())
116
- kwargs['verify'] = kwargs.get('verify', self.verify_ssl)
120
+ kwargs["verify"] = kwargs.get("verify", self.verify_ssl)
117
121
  return requests.post(url, data=data, headers=headers, **kwargs)
118
122
 
119
123
  def get_headers(self):
120
124
  return {
121
125
  # TODO: extract site title and version
122
- 'User-Agent': 'uData/0.1 {0.name}'.format(self),
126
+ "User-Agent": "uData/0.1 {0.name}".format(self),
123
127
  }
124
128
 
125
129
  def has_feature(self, key):
126
130
  try:
127
131
  feature = next(f for f in self.features if f.key == key)
128
132
  except StopIteration:
129
- raise HarvestException('Unknown feature {}'.format(key))
130
- return self.config.get('features', {}).get(key, feature.default)
133
+ raise HarvestException("Unknown feature {}".format(key))
134
+ return self.config.get("features", {}).get(key, feature.default)
131
135
 
132
136
  def get_filters(self):
133
- return self.config.get('filters', [])
137
+ return self.config.get("filters", [])
134
138
 
135
139
  def inner_harvest(self):
136
140
  raise NotImplementedError
137
-
141
+
138
142
  def inner_process_dataset(self, item: HarvestItem) -> Dataset:
139
143
  raise NotImplementedError
140
144
 
@@ -142,11 +146,9 @@ class BaseBackend(object):
142
146
  raise NotImplementedError
143
147
 
144
148
  def harvest(self):
145
- log.debug(f'Starting harvesting {self.source.name} ({self.source.url})…')
149
+ log.debug(f"Starting harvesting {self.source.name} ({self.source.url})…")
146
150
  factory = HarvestJob if self.dryrun else HarvestJob.objects.create
147
- self.job = factory(status='initialized',
148
- started=datetime.utcnow(),
149
- source=self.source)
151
+ self.job = factory(status="initialized", started=datetime.utcnow(), source=self.source)
150
152
 
151
153
  before_harvest_job.send(self)
152
154
 
@@ -156,34 +158,38 @@ class BaseBackend(object):
156
158
  if self.source.autoarchive:
157
159
  self.autoarchive()
158
160
 
159
- self.job.status = 'done'
161
+ self.job.status = "done"
160
162
 
161
- if any(i.status == 'failed' for i in self.job.items):
162
- self.job.status += '-errors'
163
+ if any(i.status == "failed" for i in self.job.items):
164
+ self.job.status += "-errors"
163
165
  except HarvestValidationError as e:
164
- log.exception(f'Harvesting validation failed for "{safe_unicode(self.source.name)}" ({self.source.backend})')
166
+ log.exception(
167
+ f'Harvesting validation failed for "{safe_unicode(self.source.name)}" ({self.source.backend})'
168
+ )
165
169
 
166
- self.job.status = 'failed'
170
+ self.job.status = "failed"
167
171
 
168
172
  error = HarvestError(message=safe_unicode(e))
169
173
  self.job.errors.append(error)
170
174
  except Exception as e:
171
- log.exception(f'Harvesting failed for "{safe_unicode(self.source.name)}" ({self.source.backend})')
175
+ log.exception(
176
+ f'Harvesting failed for "{safe_unicode(self.source.name)}" ({self.source.backend})'
177
+ )
172
178
 
173
- self.job.status = 'failed'
179
+ self.job.status = "failed"
174
180
 
175
181
  error = HarvestError(message=safe_unicode(e), details=traceback.format_exc())
176
182
  self.job.errors.append(error)
177
183
  finally:
178
184
  self.end_job()
179
-
185
+
180
186
  return self.job
181
187
 
182
188
  def process_dataset(self, remote_id: str, **kwargs):
183
- log.debug(f'Processing dataset {remote_id}…')
189
+ log.debug(f"Processing dataset {remote_id}…")
184
190
 
185
191
  # TODO add `type` to `HarvestItem` to differentiate `Dataset` from `Dataservice`
186
- item = HarvestItem(status='started', started=datetime.utcnow(), remote_id=remote_id)
192
+ item = HarvestItem(status="started", started=datetime.utcnow(), remote_id=remote_id)
187
193
  self.job.items.append(item)
188
194
  self.save_job()
189
195
 
@@ -207,42 +213,45 @@ class BaseBackend(object):
207
213
  else:
208
214
  dataset.save()
209
215
  item.dataset = dataset
210
- item.status = 'done'
216
+ item.status = "done"
211
217
  except HarvestSkipException as e:
212
- item.status = 'skipped'
218
+ item.status = "skipped"
213
219
 
214
- log.info(f'Skipped item {item.remote_id} : {safe_unicode(e)}')
220
+ log.info(f"Skipped item {item.remote_id} : {safe_unicode(e)}")
215
221
  item.errors.append(HarvestError(message=safe_unicode(e)))
216
222
  except HarvestValidationError as e:
217
- item.status = 'failed'
223
+ item.status = "failed"
218
224
 
219
- log.info(f'Error validating item {item.remote_id} : {safe_unicode(e)}')
225
+ log.info(f"Error validating item {item.remote_id} : {safe_unicode(e)}")
220
226
  item.errors.append(HarvestError(message=safe_unicode(e)))
221
227
  except Exception as e:
222
- item.status = 'failed'
223
- log.exception(f'Error while processing {item.remote_id} : {safe_unicode(e)}')
228
+ item.status = "failed"
229
+ log.exception(f"Error while processing {item.remote_id} : {safe_unicode(e)}")
224
230
 
225
231
  error = HarvestError(message=safe_unicode(e), details=traceback.format_exc())
226
232
  item.errors.append(error)
227
233
  finally:
228
234
  current_app.logger.removeHandler(log_catcher)
229
235
  item.ended = datetime.utcnow()
230
- item.logs = [HarvestLog(level=record.levelname, message=record.getMessage()) for record in log_catcher.records]
236
+ item.logs = [
237
+ HarvestLog(level=record.levelname, message=record.getMessage())
238
+ for record in log_catcher.records
239
+ ]
231
240
  self.save_job()
232
241
 
233
242
  def is_done(self) -> bool:
234
- '''Should be called after process_dataset to know if we reach the max items'''
243
+ """Should be called after process_dataset to know if we reach the max items"""
235
244
  return self.max_items and len(self.job.items) >= self.max_items
236
245
 
237
- def process_dataservice(self, remote_id: str, **kwargs) -> bool :
238
- '''
246
+ def process_dataservice(self, remote_id: str, **kwargs) -> bool:
247
+ """
239
248
  Return `True` if the parent should stop iterating because we exceed the number
240
249
  of items to process.
241
- '''
242
- log.debug(f'Processing dataservice {remote_id}…')
250
+ """
251
+ log.debug(f"Processing dataservice {remote_id}…")
243
252
 
244
253
  # TODO add `type` to `HarvestItem` to differentiate `Dataset` from `Dataservice`
245
- item = HarvestItem(status='started', started=datetime.utcnow(), remote_id=remote_id)
254
+ item = HarvestItem(status="started", started=datetime.utcnow(), remote_id=remote_id)
246
255
  self.job.items.append(item)
247
256
  self.save_job()
248
257
 
@@ -252,7 +261,9 @@ class BaseBackend(object):
252
261
 
253
262
  dataservice = self.inner_process_dataservice(item, **kwargs)
254
263
 
255
- dataservice.harvest = self.update_dataservice_harvest_info(dataservice.harvest, remote_id)
264
+ dataservice.harvest = self.update_dataservice_harvest_info(
265
+ dataservice.harvest, remote_id
266
+ )
256
267
  dataservice.archived_at = None
257
268
 
258
269
  # TODO: Apply editable mappings
@@ -262,20 +273,20 @@ class BaseBackend(object):
262
273
  else:
263
274
  dataservice.save()
264
275
  item.dataservice = dataservice
265
- item.status = 'done'
276
+ item.status = "done"
266
277
  except HarvestSkipException as e:
267
- item.status = 'skipped'
278
+ item.status = "skipped"
268
279
 
269
- log.info(f'Skipped item {item.remote_id} : {safe_unicode(e)}')
280
+ log.info(f"Skipped item {item.remote_id} : {safe_unicode(e)}")
270
281
  item.errors.append(HarvestError(message=safe_unicode(e)))
271
282
  except HarvestValidationError as e:
272
- item.status = 'failed'
283
+ item.status = "failed"
273
284
 
274
- log.info(f'Error validating item {item.remote_id} : {safe_unicode(e)}')
285
+ log.info(f"Error validating item {item.remote_id} : {safe_unicode(e)}")
275
286
  item.errors.append(HarvestError(message=safe_unicode(e)))
276
287
  except Exception as e:
277
- item.status = 'failed'
278
- log.exception(f'Error while processing {item.remote_id} : {safe_unicode(e)}')
288
+ item.status = "failed"
289
+ log.exception(f"Error while processing {item.remote_id} : {safe_unicode(e)}")
279
290
 
280
291
  error = HarvestError(message=safe_unicode(e), details=traceback.format_exc())
281
292
  item.errors.append(error)
@@ -299,7 +310,9 @@ class BaseBackend(object):
299
310
 
300
311
  return harvest
301
312
 
302
- def update_dataservice_harvest_info(self, harvest: HarvestDataserviceMetadata | None, remote_id: int):
313
+ def update_dataservice_harvest_info(
314
+ self, harvest: HarvestDataserviceMetadata | None, remote_id: int
315
+ ):
303
316
  if not harvest:
304
317
  harvest = HarvestDataserviceMetadata()
305
318
 
@@ -328,45 +341,47 @@ class BaseBackend(object):
328
341
  after_harvest_job.send(self)
329
342
 
330
343
  def autoarchive(self):
331
- '''
344
+ """
332
345
  Archive items that exist on the local instance but not on remote platform
333
346
  after a grace period of HARVEST_AUTOARCHIVE_GRACE_DAYS days.
334
- '''
335
- log.debug('Running autoarchive')
336
- limit_days = current_app.config['HARVEST_AUTOARCHIVE_GRACE_DAYS']
347
+ """
348
+ log.debug("Running autoarchive")
349
+ limit_days = current_app.config["HARVEST_AUTOARCHIVE_GRACE_DAYS"]
337
350
  limit_date = date.today() - timedelta(days=limit_days)
338
- remote_ids = [i.remote_id for i in self.job.items if i.status != 'archived']
351
+ remote_ids = [i.remote_id for i in self.job.items if i.status != "archived"]
339
352
  q = {
340
- 'harvest__source_id': str(self.source.id),
341
- 'harvest__remote_id__nin': remote_ids,
342
- 'harvest__last_update__lt': limit_date
353
+ "harvest__source_id": str(self.source.id),
354
+ "harvest__remote_id__nin": remote_ids,
355
+ "harvest__last_update__lt": limit_date,
343
356
  }
344
357
  local_items_not_on_remote = Dataset.objects.filter(**q)
345
358
 
346
359
  for dataset in local_items_not_on_remote:
347
360
  if not dataset.harvest.archived_at:
348
- archive_harvested_dataset(dataset, reason='not-on-remote', dryrun=self.dryrun)
361
+ archive_harvested_dataset(dataset, reason="not-on-remote", dryrun=self.dryrun)
349
362
  # add a HarvestItem to the job list (useful for report)
350
363
  # even when archiving has already been done (useful for debug)
351
- self.job.items.append(HarvestItem(
352
- remote_id=str(dataset.harvest.remote_id),
353
- dataset=dataset,
354
- status='archived'
355
- ))
364
+ self.job.items.append(
365
+ HarvestItem(
366
+ remote_id=str(dataset.harvest.remote_id), dataset=dataset, status="archived"
367
+ )
368
+ )
356
369
 
357
370
  self.save_job()
358
371
 
359
372
  def get_dataset(self, remote_id):
360
- '''Get or create a dataset given its remote ID (and its source)
373
+ """Get or create a dataset given its remote ID (and its source)
361
374
  We first try to match `source_id` to be source domain independent
362
- '''
363
- dataset = Dataset.objects(__raw__={
364
- 'harvest.remote_id': remote_id,
365
- '$or': [
366
- {'harvest.domain': self.source.domain},
367
- {'harvest.source_id': str(self.source.id)},
368
- ],
369
- }).first()
375
+ """
376
+ dataset = Dataset.objects(
377
+ __raw__={
378
+ "harvest.remote_id": remote_id,
379
+ "$or": [
380
+ {"harvest.domain": self.source.domain},
381
+ {"harvest.source_id": str(self.source.id)},
382
+ ],
383
+ }
384
+ ).first()
370
385
 
371
386
  if dataset:
372
387
  return dataset
@@ -377,18 +392,20 @@ class BaseBackend(object):
377
392
  return Dataset(owner=self.source.owner)
378
393
 
379
394
  return Dataset()
380
-
395
+
381
396
  def get_dataservice(self, remote_id):
382
- '''Get or create a dataservice given its remote ID (and its source)
397
+ """Get or create a dataservice given its remote ID (and its source)
383
398
  We first try to match `source_id` to be source domain independent
384
- '''
385
- dataservice = Dataservice.objects(__raw__={
386
- 'harvest.remote_id': remote_id,
387
- '$or': [
388
- {'harvest.domain': self.source.domain},
389
- {'harvest.source_id': str(self.source.id)},
390
- ],
391
- }).first()
399
+ """
400
+ dataservice = Dataservice.objects(
401
+ __raw__={
402
+ "harvest.remote_id": remote_id,
403
+ "$or": [
404
+ {"harvest.domain": self.source.domain},
405
+ {"harvest.source_id": str(self.source.id)},
406
+ ],
407
+ }
408
+ ).first()
392
409
 
393
410
  if dataservice:
394
411
  return dataservice
@@ -401,18 +418,18 @@ class BaseBackend(object):
401
418
  return Dataservice()
402
419
 
403
420
  def validate(self, data, schema):
404
- '''Perform a data validation against a given schema.
421
+ """Perform a data validation against a given schema.
405
422
 
406
423
  :param data: an object to validate
407
424
  :param schema: a Voluptous schema to validate against
408
- '''
425
+ """
409
426
  try:
410
427
  return schema(data)
411
428
  except MultipleInvalid as ie:
412
429
  errors = []
413
430
  for error in ie.errors:
414
431
  if error.path:
415
- field = '.'.join(str(p) for p in error.path)
432
+ field = ".".join(str(p) for p in error.path)
416
433
  path = error.path
417
434
  value = data
418
435
  while path:
@@ -424,21 +441,21 @@ class BaseBackend(object):
424
441
  except Exception:
425
442
  value = None
426
443
 
427
- txt = safe_unicode(error).replace('for dictionary value', '')
444
+ txt = safe_unicode(error).replace("for dictionary value", "")
428
445
  txt = txt.strip()
429
446
  if isinstance(error, RequiredFieldInvalid):
430
- msg = '[{0}] {1}'
447
+ msg = "[{0}] {1}"
431
448
  else:
432
- msg = '[{0}] {1}: {2}'
449
+ msg = "[{0}] {1}: {2}"
433
450
  try:
434
451
  msg = msg.format(field, txt, str(value))
435
452
  except Exception:
436
- msg = '[{0}] {1}'.format(field, txt)
453
+ msg = "[{0}] {1}".format(field, txt)
437
454
 
438
455
  else:
439
456
  msg = str(error)
440
457
  errors.append(msg)
441
- msg = '\n- '.join(['Validation error:'] + errors)
458
+ msg = "\n- ".join(["Validation error:"] + errors)
442
459
  raise HarvestValidationError(msg)
443
460
 
444
461
 
@@ -450,4 +467,4 @@ class LogCatcher(logging.Handler):
450
467
  super().__init__()
451
468
 
452
469
  def emit(self, record):
453
- self.records.append(record)
470
+ self.records.append(record)