udata 9.1.2.dev30355__py2.py3-none-any.whl → 9.1.2.dev30454__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of udata might be problematic. Click here for more details.

Files changed (413) hide show
  1. tasks/__init__.py +109 -107
  2. tasks/helpers.py +18 -18
  3. udata/__init__.py +4 -4
  4. udata/admin/views.py +5 -5
  5. udata/api/__init__.py +111 -134
  6. udata/api/commands.py +45 -37
  7. udata/api/errors.py +5 -4
  8. udata/api/fields.py +23 -21
  9. udata/api/oauth2.py +55 -74
  10. udata/api/parsers.py +15 -15
  11. udata/api/signals.py +1 -1
  12. udata/api_fields.py +137 -89
  13. udata/app.py +58 -55
  14. udata/assets.py +5 -5
  15. udata/auth/__init__.py +37 -26
  16. udata/auth/forms.py +23 -15
  17. udata/auth/helpers.py +1 -1
  18. udata/auth/mails.py +3 -3
  19. udata/auth/password_validation.py +19 -15
  20. udata/auth/views.py +94 -68
  21. udata/commands/__init__.py +71 -69
  22. udata/commands/cache.py +7 -7
  23. udata/commands/db.py +201 -140
  24. udata/commands/dcat.py +36 -30
  25. udata/commands/fixtures.py +100 -84
  26. udata/commands/images.py +21 -20
  27. udata/commands/info.py +17 -20
  28. udata/commands/init.py +10 -10
  29. udata/commands/purge.py +12 -13
  30. udata/commands/serve.py +41 -29
  31. udata/commands/static.py +16 -18
  32. udata/commands/test.py +20 -20
  33. udata/commands/tests/fixtures.py +26 -24
  34. udata/commands/worker.py +31 -33
  35. udata/core/__init__.py +12 -12
  36. udata/core/activity/__init__.py +0 -1
  37. udata/core/activity/api.py +59 -49
  38. udata/core/activity/models.py +28 -26
  39. udata/core/activity/signals.py +1 -1
  40. udata/core/activity/tasks.py +16 -10
  41. udata/core/badges/api.py +6 -6
  42. udata/core/badges/commands.py +14 -13
  43. udata/core/badges/fields.py +8 -5
  44. udata/core/badges/forms.py +7 -4
  45. udata/core/badges/models.py +16 -31
  46. udata/core/badges/permissions.py +1 -3
  47. udata/core/badges/signals.py +2 -2
  48. udata/core/badges/tasks.py +3 -2
  49. udata/core/badges/tests/test_commands.py +10 -10
  50. udata/core/badges/tests/test_model.py +24 -31
  51. udata/core/contact_point/api.py +19 -18
  52. udata/core/contact_point/api_fields.py +21 -14
  53. udata/core/contact_point/factories.py +2 -2
  54. udata/core/contact_point/forms.py +7 -6
  55. udata/core/contact_point/models.py +3 -5
  56. udata/core/dataservices/api.py +26 -21
  57. udata/core/dataservices/factories.py +13 -11
  58. udata/core/dataservices/models.py +35 -40
  59. udata/core/dataservices/permissions.py +4 -4
  60. udata/core/dataservices/rdf.py +40 -17
  61. udata/core/dataservices/tasks.py +4 -3
  62. udata/core/dataset/actions.py +10 -10
  63. udata/core/dataset/activities.py +21 -23
  64. udata/core/dataset/api.py +321 -298
  65. udata/core/dataset/api_fields.py +443 -271
  66. udata/core/dataset/apiv2.py +305 -229
  67. udata/core/dataset/commands.py +38 -36
  68. udata/core/dataset/constants.py +61 -54
  69. udata/core/dataset/csv.py +70 -74
  70. udata/core/dataset/events.py +39 -32
  71. udata/core/dataset/exceptions.py +8 -4
  72. udata/core/dataset/factories.py +57 -65
  73. udata/core/dataset/forms.py +87 -63
  74. udata/core/dataset/models.py +336 -280
  75. udata/core/dataset/permissions.py +9 -6
  76. udata/core/dataset/preview.py +15 -17
  77. udata/core/dataset/rdf.py +156 -122
  78. udata/core/dataset/search.py +92 -77
  79. udata/core/dataset/signals.py +1 -1
  80. udata/core/dataset/tasks.py +63 -54
  81. udata/core/discussions/actions.py +5 -5
  82. udata/core/discussions/api.py +124 -120
  83. udata/core/discussions/factories.py +2 -2
  84. udata/core/discussions/forms.py +9 -7
  85. udata/core/discussions/metrics.py +1 -3
  86. udata/core/discussions/models.py +25 -24
  87. udata/core/discussions/notifications.py +18 -14
  88. udata/core/discussions/permissions.py +3 -3
  89. udata/core/discussions/signals.py +4 -4
  90. udata/core/discussions/tasks.py +24 -28
  91. udata/core/followers/api.py +32 -33
  92. udata/core/followers/models.py +9 -9
  93. udata/core/followers/signals.py +3 -3
  94. udata/core/jobs/actions.py +7 -7
  95. udata/core/jobs/api.py +99 -92
  96. udata/core/jobs/commands.py +48 -49
  97. udata/core/jobs/forms.py +11 -11
  98. udata/core/jobs/models.py +6 -6
  99. udata/core/metrics/__init__.py +2 -2
  100. udata/core/metrics/commands.py +34 -30
  101. udata/core/metrics/models.py +2 -4
  102. udata/core/metrics/signals.py +1 -1
  103. udata/core/metrics/tasks.py +3 -3
  104. udata/core/organization/activities.py +12 -15
  105. udata/core/organization/api.py +167 -174
  106. udata/core/organization/api_fields.py +183 -124
  107. udata/core/organization/apiv2.py +32 -32
  108. udata/core/organization/commands.py +20 -22
  109. udata/core/organization/constants.py +11 -11
  110. udata/core/organization/csv.py +17 -15
  111. udata/core/organization/factories.py +8 -11
  112. udata/core/organization/forms.py +32 -26
  113. udata/core/organization/metrics.py +2 -1
  114. udata/core/organization/models.py +87 -67
  115. udata/core/organization/notifications.py +18 -14
  116. udata/core/organization/permissions.py +10 -11
  117. udata/core/organization/rdf.py +14 -14
  118. udata/core/organization/search.py +30 -28
  119. udata/core/organization/signals.py +7 -7
  120. udata/core/organization/tasks.py +42 -61
  121. udata/core/owned.py +38 -27
  122. udata/core/post/api.py +82 -81
  123. udata/core/post/constants.py +8 -5
  124. udata/core/post/factories.py +4 -4
  125. udata/core/post/forms.py +13 -14
  126. udata/core/post/models.py +20 -22
  127. udata/core/post/tests/test_api.py +30 -32
  128. udata/core/reports/api.py +8 -7
  129. udata/core/reports/constants.py +1 -3
  130. udata/core/reports/models.py +10 -10
  131. udata/core/reuse/activities.py +15 -19
  132. udata/core/reuse/api.py +123 -126
  133. udata/core/reuse/api_fields.py +120 -85
  134. udata/core/reuse/apiv2.py +11 -10
  135. udata/core/reuse/constants.py +23 -23
  136. udata/core/reuse/csv.py +18 -18
  137. udata/core/reuse/factories.py +5 -9
  138. udata/core/reuse/forms.py +24 -21
  139. udata/core/reuse/models.py +55 -51
  140. udata/core/reuse/permissions.py +2 -2
  141. udata/core/reuse/search.py +49 -46
  142. udata/core/reuse/signals.py +1 -1
  143. udata/core/reuse/tasks.py +4 -5
  144. udata/core/site/api.py +47 -50
  145. udata/core/site/factories.py +2 -2
  146. udata/core/site/forms.py +4 -5
  147. udata/core/site/models.py +94 -63
  148. udata/core/site/rdf.py +14 -14
  149. udata/core/spam/api.py +16 -9
  150. udata/core/spam/constants.py +4 -4
  151. udata/core/spam/fields.py +13 -7
  152. udata/core/spam/models.py +27 -20
  153. udata/core/spam/signals.py +1 -1
  154. udata/core/spam/tests/test_spam.py +6 -5
  155. udata/core/spatial/api.py +72 -80
  156. udata/core/spatial/api_fields.py +73 -58
  157. udata/core/spatial/commands.py +67 -64
  158. udata/core/spatial/constants.py +3 -3
  159. udata/core/spatial/factories.py +37 -54
  160. udata/core/spatial/forms.py +27 -26
  161. udata/core/spatial/geoids.py +17 -17
  162. udata/core/spatial/models.py +43 -47
  163. udata/core/spatial/tasks.py +2 -1
  164. udata/core/spatial/tests/test_api.py +115 -130
  165. udata/core/spatial/tests/test_fields.py +74 -77
  166. udata/core/spatial/tests/test_geoid.py +22 -22
  167. udata/core/spatial/tests/test_models.py +5 -7
  168. udata/core/spatial/translations.py +16 -16
  169. udata/core/storages/__init__.py +16 -18
  170. udata/core/storages/api.py +66 -64
  171. udata/core/storages/tasks.py +7 -7
  172. udata/core/storages/utils.py +15 -15
  173. udata/core/storages/views.py +5 -6
  174. udata/core/tags/api.py +17 -14
  175. udata/core/tags/csv.py +4 -4
  176. udata/core/tags/models.py +8 -5
  177. udata/core/tags/tasks.py +11 -13
  178. udata/core/tags/views.py +4 -4
  179. udata/core/topic/api.py +84 -73
  180. udata/core/topic/apiv2.py +157 -127
  181. udata/core/topic/factories.py +3 -4
  182. udata/core/topic/forms.py +12 -14
  183. udata/core/topic/models.py +14 -19
  184. udata/core/topic/parsers.py +26 -26
  185. udata/core/user/activities.py +30 -29
  186. udata/core/user/api.py +151 -152
  187. udata/core/user/api_fields.py +132 -100
  188. udata/core/user/apiv2.py +7 -7
  189. udata/core/user/commands.py +38 -38
  190. udata/core/user/factories.py +8 -9
  191. udata/core/user/forms.py +14 -11
  192. udata/core/user/metrics.py +2 -2
  193. udata/core/user/models.py +68 -69
  194. udata/core/user/permissions.py +4 -5
  195. udata/core/user/rdf.py +7 -8
  196. udata/core/user/tasks.py +2 -2
  197. udata/core/user/tests/test_user_model.py +24 -16
  198. udata/cors.py +99 -0
  199. udata/db/tasks.py +2 -1
  200. udata/entrypoints.py +35 -31
  201. udata/errors.py +2 -1
  202. udata/event/values.py +6 -6
  203. udata/factories.py +2 -2
  204. udata/features/identicon/api.py +5 -6
  205. udata/features/identicon/backends.py +48 -55
  206. udata/features/identicon/tests/test_backends.py +4 -5
  207. udata/features/notifications/__init__.py +0 -1
  208. udata/features/notifications/actions.py +9 -9
  209. udata/features/notifications/api.py +17 -13
  210. udata/features/territories/__init__.py +12 -10
  211. udata/features/territories/api.py +14 -15
  212. udata/features/territories/models.py +23 -28
  213. udata/features/transfer/actions.py +8 -11
  214. udata/features/transfer/api.py +84 -77
  215. udata/features/transfer/factories.py +2 -1
  216. udata/features/transfer/models.py +11 -12
  217. udata/features/transfer/notifications.py +19 -15
  218. udata/features/transfer/permissions.py +5 -5
  219. udata/forms/__init__.py +5 -2
  220. udata/forms/fields.py +164 -172
  221. udata/forms/validators.py +19 -22
  222. udata/forms/widgets.py +9 -13
  223. udata/frontend/__init__.py +31 -26
  224. udata/frontend/csv.py +68 -58
  225. udata/frontend/markdown.py +40 -44
  226. udata/harvest/actions.py +89 -77
  227. udata/harvest/api.py +294 -238
  228. udata/harvest/backends/__init__.py +4 -4
  229. udata/harvest/backends/base.py +128 -111
  230. udata/harvest/backends/dcat.py +80 -66
  231. udata/harvest/commands.py +56 -60
  232. udata/harvest/csv.py +8 -8
  233. udata/harvest/exceptions.py +6 -3
  234. udata/harvest/filters.py +24 -23
  235. udata/harvest/forms.py +27 -28
  236. udata/harvest/models.py +88 -80
  237. udata/harvest/notifications.py +15 -10
  238. udata/harvest/signals.py +13 -13
  239. udata/harvest/tasks.py +11 -10
  240. udata/harvest/tests/factories.py +23 -24
  241. udata/harvest/tests/test_actions.py +136 -166
  242. udata/harvest/tests/test_api.py +220 -214
  243. udata/harvest/tests/test_base_backend.py +117 -112
  244. udata/harvest/tests/test_dcat_backend.py +380 -308
  245. udata/harvest/tests/test_filters.py +33 -22
  246. udata/harvest/tests/test_models.py +11 -14
  247. udata/harvest/tests/test_notifications.py +6 -7
  248. udata/harvest/tests/test_tasks.py +7 -6
  249. udata/i18n.py +237 -78
  250. udata/linkchecker/backends.py +5 -11
  251. udata/linkchecker/checker.py +23 -22
  252. udata/linkchecker/commands.py +4 -6
  253. udata/linkchecker/models.py +6 -6
  254. udata/linkchecker/tasks.py +18 -20
  255. udata/mail.py +21 -21
  256. udata/migrations/2020-07-24-remove-s-from-scope-oauth.py +9 -8
  257. udata/migrations/2020-08-24-add-fs-filename.py +9 -8
  258. udata/migrations/2020-09-28-update-reuses-datasets-metrics.py +5 -4
  259. udata/migrations/2020-10-16-migrate-ods-resources.py +9 -10
  260. udata/migrations/2021-04-08-update-schema-with-new-structure.py +8 -7
  261. udata/migrations/2021-05-27-fix-default-schema-name.py +7 -6
  262. udata/migrations/2021-07-05-remove-unused-badges.py +17 -15
  263. udata/migrations/2021-07-07-update-schema-for-community-resources.py +7 -6
  264. udata/migrations/2021-08-17-follow-integrity.py +5 -4
  265. udata/migrations/2021-08-17-harvest-integrity.py +13 -12
  266. udata/migrations/2021-08-17-oauth2client-integrity.py +5 -4
  267. udata/migrations/2021-08-17-transfer-integrity.py +5 -4
  268. udata/migrations/2021-08-17-users-integrity.py +9 -8
  269. udata/migrations/2021-12-14-reuse-topics.py +7 -6
  270. udata/migrations/2022-04-21-improve-extension-detection.py +8 -7
  271. udata/migrations/2022-09-22-clean-inactive-harvest-datasets.py +16 -14
  272. udata/migrations/2022-10-10-add-fs_uniquifier-to-user-model.py +6 -6
  273. udata/migrations/2022-10-10-migrate-harvest-extras.py +36 -26
  274. udata/migrations/2023-02-08-rename-internal-dates.py +46 -28
  275. udata/migrations/2024-01-29-fix-reuse-and-dataset-with-private-None.py +10 -8
  276. udata/migrations/2024-03-22-migrate-activity-kwargs-to-extras.py +6 -4
  277. udata/migrations/2024-06-11-fix-reuse-datasets-references.py +7 -6
  278. udata/migrations/__init__.py +123 -105
  279. udata/models/__init__.py +4 -4
  280. udata/mongo/__init__.py +13 -11
  281. udata/mongo/badges_field.py +3 -2
  282. udata/mongo/datetime_fields.py +13 -12
  283. udata/mongo/document.py +17 -16
  284. udata/mongo/engine.py +15 -16
  285. udata/mongo/errors.py +2 -1
  286. udata/mongo/extras_fields.py +30 -20
  287. udata/mongo/queryset.py +12 -12
  288. udata/mongo/slug_fields.py +38 -28
  289. udata/mongo/taglist_field.py +1 -2
  290. udata/mongo/url_field.py +5 -5
  291. udata/mongo/uuid_fields.py +4 -3
  292. udata/notifications/__init__.py +1 -1
  293. udata/notifications/mattermost.py +10 -9
  294. udata/rdf.py +167 -188
  295. udata/routing.py +40 -45
  296. udata/search/__init__.py +18 -19
  297. udata/search/adapter.py +17 -16
  298. udata/search/commands.py +44 -51
  299. udata/search/fields.py +13 -20
  300. udata/search/query.py +23 -18
  301. udata/search/result.py +9 -10
  302. udata/sentry.py +21 -19
  303. udata/settings.py +262 -198
  304. udata/sitemap.py +8 -6
  305. udata/storage/s3.py +20 -13
  306. udata/tags.py +4 -5
  307. udata/tasks.py +43 -42
  308. udata/tests/__init__.py +9 -6
  309. udata/tests/api/__init__.py +8 -6
  310. udata/tests/api/test_auth_api.py +395 -321
  311. udata/tests/api/test_base_api.py +33 -35
  312. udata/tests/api/test_contact_points.py +7 -9
  313. udata/tests/api/test_dataservices_api.py +211 -158
  314. udata/tests/api/test_datasets_api.py +823 -812
  315. udata/tests/api/test_follow_api.py +13 -15
  316. udata/tests/api/test_me_api.py +95 -112
  317. udata/tests/api/test_organizations_api.py +301 -339
  318. udata/tests/api/test_reports_api.py +35 -25
  319. udata/tests/api/test_reuses_api.py +134 -139
  320. udata/tests/api/test_swagger.py +5 -5
  321. udata/tests/api/test_tags_api.py +18 -25
  322. udata/tests/api/test_topics_api.py +94 -94
  323. udata/tests/api/test_transfer_api.py +53 -48
  324. udata/tests/api/test_user_api.py +128 -141
  325. udata/tests/apiv2/test_datasets.py +290 -198
  326. udata/tests/apiv2/test_me_api.py +10 -11
  327. udata/tests/apiv2/test_organizations.py +56 -74
  328. udata/tests/apiv2/test_swagger.py +5 -5
  329. udata/tests/apiv2/test_topics.py +69 -87
  330. udata/tests/cli/test_cli_base.py +8 -8
  331. udata/tests/cli/test_db_cli.py +21 -19
  332. udata/tests/dataservice/test_dataservice_tasks.py +8 -12
  333. udata/tests/dataset/test_csv_adapter.py +44 -35
  334. udata/tests/dataset/test_dataset_actions.py +2 -3
  335. udata/tests/dataset/test_dataset_commands.py +7 -8
  336. udata/tests/dataset/test_dataset_events.py +36 -29
  337. udata/tests/dataset/test_dataset_model.py +224 -217
  338. udata/tests/dataset/test_dataset_rdf.py +142 -131
  339. udata/tests/dataset/test_dataset_tasks.py +15 -15
  340. udata/tests/dataset/test_resource_preview.py +10 -13
  341. udata/tests/features/territories/__init__.py +9 -13
  342. udata/tests/features/territories/test_territories_api.py +71 -91
  343. udata/tests/forms/test_basic_fields.py +7 -7
  344. udata/tests/forms/test_current_user_field.py +39 -66
  345. udata/tests/forms/test_daterange_field.py +31 -39
  346. udata/tests/forms/test_dict_field.py +28 -26
  347. udata/tests/forms/test_extras_fields.py +102 -76
  348. udata/tests/forms/test_form_field.py +8 -8
  349. udata/tests/forms/test_image_field.py +33 -26
  350. udata/tests/forms/test_model_field.py +134 -123
  351. udata/tests/forms/test_model_list_field.py +7 -7
  352. udata/tests/forms/test_nested_model_list_field.py +117 -79
  353. udata/tests/forms/test_publish_as_field.py +36 -65
  354. udata/tests/forms/test_reference_field.py +34 -53
  355. udata/tests/forms/test_user_forms.py +23 -21
  356. udata/tests/forms/test_uuid_field.py +6 -10
  357. udata/tests/frontend/__init__.py +9 -6
  358. udata/tests/frontend/test_auth.py +7 -6
  359. udata/tests/frontend/test_csv.py +81 -96
  360. udata/tests/frontend/test_hooks.py +43 -43
  361. udata/tests/frontend/test_markdown.py +211 -191
  362. udata/tests/helpers.py +32 -37
  363. udata/tests/models.py +2 -2
  364. udata/tests/organization/test_csv_adapter.py +21 -16
  365. udata/tests/organization/test_notifications.py +11 -18
  366. udata/tests/organization/test_organization_model.py +13 -13
  367. udata/tests/organization/test_organization_rdf.py +29 -22
  368. udata/tests/organization/test_organization_tasks.py +16 -17
  369. udata/tests/plugin.py +79 -73
  370. udata/tests/reuse/test_reuse_model.py +21 -21
  371. udata/tests/reuse/test_reuse_task.py +11 -13
  372. udata/tests/search/__init__.py +11 -12
  373. udata/tests/search/test_adapter.py +60 -70
  374. udata/tests/search/test_query.py +16 -16
  375. udata/tests/search/test_results.py +10 -7
  376. udata/tests/site/test_site_api.py +11 -16
  377. udata/tests/site/test_site_metrics.py +20 -30
  378. udata/tests/site/test_site_model.py +4 -5
  379. udata/tests/site/test_site_rdf.py +94 -78
  380. udata/tests/test_activity.py +17 -17
  381. udata/tests/test_cors.py +62 -0
  382. udata/tests/test_discussions.py +292 -299
  383. udata/tests/test_i18n.py +37 -40
  384. udata/tests/test_linkchecker.py +91 -85
  385. udata/tests/test_mail.py +13 -17
  386. udata/tests/test_migrations.py +219 -180
  387. udata/tests/test_model.py +164 -157
  388. udata/tests/test_notifications.py +17 -17
  389. udata/tests/test_owned.py +14 -14
  390. udata/tests/test_rdf.py +25 -23
  391. udata/tests/test_routing.py +89 -93
  392. udata/tests/test_storages.py +137 -128
  393. udata/tests/test_tags.py +44 -46
  394. udata/tests/test_topics.py +7 -7
  395. udata/tests/test_transfer.py +42 -49
  396. udata/tests/test_uris.py +160 -161
  397. udata/tests/test_utils.py +79 -71
  398. udata/tests/user/test_user_rdf.py +5 -9
  399. udata/tests/workers/test_jobs_commands.py +57 -58
  400. udata/tests/workers/test_tasks_routing.py +23 -29
  401. udata/tests/workers/test_workers_api.py +125 -131
  402. udata/tests/workers/test_workers_helpers.py +6 -6
  403. udata/tracking.py +4 -6
  404. udata/uris.py +45 -46
  405. udata/utils.py +68 -66
  406. udata/wsgi.py +1 -1
  407. {udata-9.1.2.dev30355.dist-info → udata-9.1.2.dev30454.dist-info}/METADATA +7 -3
  408. udata-9.1.2.dev30454.dist-info/RECORD +706 -0
  409. udata-9.1.2.dev30355.dist-info/RECORD +0 -704
  410. {udata-9.1.2.dev30355.dist-info → udata-9.1.2.dev30454.dist-info}/LICENSE +0 -0
  411. {udata-9.1.2.dev30355.dist-info → udata-9.1.2.dev30454.dist-info}/WHEEL +0 -0
  412. {udata-9.1.2.dev30355.dist-info → udata-9.1.2.dev30454.dist-info}/entry_points.txt +0 -0
  413. {udata-9.1.2.dev30355.dist-info → udata-9.1.2.dev30454.dist-info}/top_level.txt +0 -0
@@ -1,32 +1,31 @@
1
- from datetime import date
2
1
  import logging
3
2
  import os
4
3
  import re
5
-
6
- import pytest
4
+ import xml.etree.ElementTree as ET
5
+ from datetime import date
7
6
 
8
7
  import boto3
8
+ import pytest
9
9
  from flask import current_app
10
- import xml.etree.ElementTree as ET
11
10
 
12
11
  from udata.core.dataservices.models import Dataservice
12
+ from udata.core.dataset.factories import LicenseFactory, ResourceSchemaMockData
13
+ from udata.core.organization.factories import OrganizationFactory
13
14
  from udata.harvest.models import HarvestJob
14
15
  from udata.models import Dataset
15
- from udata.core.organization.factories import OrganizationFactory
16
- from udata.core.dataset.factories import LicenseFactory, ResourceSchemaMockData
17
16
  from udata.storage.s3 import get_from_json
18
17
 
19
- from .factories import HarvestSourceFactory
20
- from ..backends.dcat import URIS_TO_REPLACE, CswIso19139DcatBackend
21
18
  from .. import actions
19
+ from ..backends.dcat import URIS_TO_REPLACE, CswIso19139DcatBackend
20
+ from .factories import HarvestSourceFactory
22
21
 
23
22
  log = logging.getLogger(__name__)
24
23
 
25
24
 
26
- TEST_DOMAIN = 'data.test.org' # Need to be used in fixture file
27
- DCAT_URL_PATTERN = 'http://{domain}/{path}'
28
- DCAT_FILES_DIR = os.path.join(os.path.dirname(__file__), 'dcat')
29
- CSW_DCAT_FILES_DIR = os.path.join(os.path.dirname(__file__), 'csw_dcat')
25
+ TEST_DOMAIN = "data.test.org" # Need to be used in fixture file
26
+ DCAT_URL_PATTERN = "http://{domain}/{path}"
27
+ DCAT_FILES_DIR = os.path.join(os.path.dirname(__file__), "dcat")
28
+ CSW_DCAT_FILES_DIR = os.path.join(os.path.dirname(__file__), "csw_dcat")
30
29
 
31
30
 
32
31
  def mock_dcat(rmock, filename, path=None):
@@ -41,7 +40,7 @@ def mock_pagination(rmock, path, pattern):
41
40
  url = DCAT_URL_PATTERN.format(path=path, domain=TEST_DOMAIN)
42
41
 
43
42
  def callback(request, context):
44
- page = request.qs.get('page', [1])[0]
43
+ page = request.qs.get("page", [1])[0]
45
44
  filename = pattern.format(page=page)
46
45
  context.status_code = 200
47
46
  with open(os.path.join(DCAT_FILES_DIR, filename)) as dcatfile:
@@ -56,7 +55,7 @@ def mock_csw_pagination(rmock, path, pattern):
56
55
 
57
56
  def callback(request, context):
58
57
  request_tree = ET.fromstring(request.body)
59
- page = int(request_tree.get('startPosition'))
58
+ page = int(request_tree.get("startPosition"))
60
59
  with open(os.path.join(CSW_DCAT_FILES_DIR, pattern.format(page))) as cswdcatfile:
61
60
  return cswdcatfile.read()
62
61
 
@@ -64,17 +63,14 @@ def mock_csw_pagination(rmock, path, pattern):
64
63
  return url
65
64
 
66
65
 
67
- @pytest.mark.usefixtures('clean_db')
68
- @pytest.mark.options(PLUGINS=['dcat'])
66
+ @pytest.mark.usefixtures("clean_db")
67
+ @pytest.mark.options(PLUGINS=["dcat"])
69
68
  class DcatBackendTest:
70
-
71
69
  def test_simple_flat(self, rmock):
72
- filename = 'flat.jsonld'
70
+ filename = "flat.jsonld"
73
71
  url = mock_dcat(rmock, filename)
74
72
  org = OrganizationFactory()
75
- source = HarvestSourceFactory(backend='dcat',
76
- url=url,
77
- organization=org)
73
+ source = HarvestSourceFactory(backend="dcat", url=url, organization=org)
78
74
 
79
75
  actions.run(source.slug)
80
76
 
@@ -87,17 +83,17 @@ class DcatBackendTest:
87
83
 
88
84
  assert len(datasets) == 3
89
85
 
90
- for i in '1 2 3'.split():
86
+ for i in "1 2 3".split():
91
87
  d = datasets[i]
92
- assert d.title == f'Dataset {i}'
93
- assert d.description == f'Dataset {i} description'
88
+ assert d.title == f"Dataset {i}"
89
+ assert d.description == f"Dataset {i} description"
94
90
  assert d.harvest.remote_id == i
95
- assert d.harvest.backend == 'DCAT'
91
+ assert d.harvest.backend == "DCAT"
96
92
  assert d.harvest.source_id == str(source.id)
97
93
  assert d.harvest.domain == source.domain
98
94
  assert d.harvest.dct_identifier == i
99
- assert d.harvest.remote_url == f'http://data.test.org/datasets/{i}'
100
- assert d.harvest.uri == f'http://data.test.org/datasets/{i}'
95
+ assert d.harvest.remote_url == f"http://data.test.org/datasets/{i}"
96
+ assert d.harvest.uri == f"http://data.test.org/datasets/{i}"
101
97
  assert d.harvest.created_at.date() == date(2016, 12, 14)
102
98
  assert d.harvest.modified_at.date() == date(2016, 12, 14)
103
99
  assert d.harvest.last_update.date() == date.today()
@@ -105,72 +101,72 @@ class DcatBackendTest:
105
101
  assert d.harvest.archived is None
106
102
 
107
103
  # First dataset
108
- dataset = datasets['1']
109
- assert dataset.tags == ['tag-1', 'tag-2', 'tag-3', 'tag-4',
110
- 'theme-1', 'theme-2']
104
+ dataset = datasets["1"]
105
+ assert dataset.tags == ["tag-1", "tag-2", "tag-3", "tag-4", "theme-1", "theme-2"]
111
106
  assert len(dataset.resources) == 2
112
107
 
113
108
  # Second dataset
114
- dataset = datasets['2']
115
- assert dataset.tags == ['tag-1', 'tag-2', 'tag-3']
109
+ dataset = datasets["2"]
110
+ assert dataset.tags == ["tag-1", "tag-2", "tag-3"]
116
111
  assert len(dataset.resources) == 2
117
112
 
118
113
  # Third dataset
119
- dataset = datasets['3']
120
- assert dataset.tags == ['tag-1', 'tag-2']
114
+ dataset = datasets["3"]
115
+ assert dataset.tags == ["tag-1", "tag-2"]
121
116
  assert len(dataset.resources) == 1
122
117
 
123
118
  def test_flat_with_blank_nodes(self, rmock):
124
- filename = 'bnodes.jsonld'
119
+ filename = "bnodes.jsonld"
125
120
  url = mock_dcat(rmock, filename)
126
121
  org = OrganizationFactory()
127
- source = HarvestSourceFactory(backend='dcat',
128
- url=url,
129
- organization=org)
122
+ source = HarvestSourceFactory(backend="dcat", url=url, organization=org)
130
123
 
131
124
  actions.run(source.slug)
132
125
 
133
126
  datasets = {d.harvest.dct_identifier: d for d in Dataset.objects}
134
127
 
135
128
  assert len(datasets) == 3
136
- assert len(datasets['1'].resources) == 2
137
- assert len(datasets['2'].resources) == 2
138
- assert len(datasets['3'].resources) == 1
139
-
140
- assert datasets['1'].resources[0].title == 'Resource 1-1'
141
- assert datasets['1'].resources[0].description == 'A JSON resource'
142
- assert datasets['1'].resources[0].format == 'json'
143
- assert datasets['1'].resources[0].mime == 'application/json'
144
-
145
- @pytest.mark.options(SCHEMA_CATALOG_URL='https://example.com/schemas', HARVEST_MAX_CATALOG_SIZE_IN_MONGO=None, HARVEST_GRAPHS_S3_BUCKET="test_bucket", S3_URL="https://example.org", S3_ACCESS_KEY_ID="myUser", S3_SECRET_ACCESS_KEY="password")
129
+ assert len(datasets["1"].resources) == 2
130
+ assert len(datasets["2"].resources) == 2
131
+ assert len(datasets["3"].resources) == 1
132
+
133
+ assert datasets["1"].resources[0].title == "Resource 1-1"
134
+ assert datasets["1"].resources[0].description == "A JSON resource"
135
+ assert datasets["1"].resources[0].format == "json"
136
+ assert datasets["1"].resources[0].mime == "application/json"
137
+
138
+ @pytest.mark.options(
139
+ SCHEMA_CATALOG_URL="https://example.com/schemas",
140
+ HARVEST_MAX_CATALOG_SIZE_IN_MONGO=None,
141
+ HARVEST_GRAPHS_S3_BUCKET="test_bucket",
142
+ S3_URL="https://example.org",
143
+ S3_ACCESS_KEY_ID="myUser",
144
+ S3_SECRET_ACCESS_KEY="password",
145
+ )
146
146
  def test_flat_with_blank_nodes_xml(self, rmock):
147
- rmock.get('https://example.com/schemas', json=ResourceSchemaMockData.get_mock_data())
147
+ rmock.get("https://example.com/schemas", json=ResourceSchemaMockData.get_mock_data())
148
148
 
149
- filename = 'bnodes.xml'
149
+ filename = "bnodes.xml"
150
150
  url = mock_dcat(rmock, filename)
151
151
  org = OrganizationFactory()
152
- source = HarvestSourceFactory(backend='dcat',
153
- url=url,
154
- organization=org)
152
+ source = HarvestSourceFactory(backend="dcat", url=url, organization=org)
155
153
 
156
154
  actions.run(source.slug)
157
155
 
158
156
  datasets = {d.harvest.dct_identifier: d for d in Dataset.objects}
159
157
 
160
158
  assert len(datasets) == 3
161
- assert len(datasets['3'].resources) == 1
162
- assert len(datasets['1'].resources) == 2
163
- assert len(datasets['2'].resources) == 2
159
+ assert len(datasets["3"].resources) == 1
160
+ assert len(datasets["1"].resources) == 2
161
+ assert len(datasets["2"].resources) == 2
164
162
 
165
163
  def test_harvest_dataservices(self, rmock):
166
- rmock.get('https://example.com/schemas', json=ResourceSchemaMockData.get_mock_data())
164
+ rmock.get("https://example.com/schemas", json=ResourceSchemaMockData.get_mock_data())
167
165
 
168
- filename = 'bnodes.xml'
166
+ filename = "bnodes.xml"
169
167
  url = mock_dcat(rmock, filename)
170
168
  org = OrganizationFactory()
171
- source = HarvestSourceFactory(backend='dcat',
172
- url=url,
173
- organization=org)
169
+ source = HarvestSourceFactory(backend="dcat", url=url, organization=org)
174
170
 
175
171
  actions.run(source.slug)
176
172
 
@@ -179,30 +175,57 @@ class DcatBackendTest:
179
175
  assert len(dataservices) == 1
180
176
  assert dataservices[0].title == "Explore API v2"
181
177
  assert dataservices[0].base_api_url == "https://data.paris2024.org/api/explore/v2.1/"
182
- assert dataservices[0].endpoint_description_url == "https://data.paris2024.org/api/explore/v2.1/swagger.json"
183
- assert dataservices[0].harvest.remote_url == "https://data.paris2024.org/api/explore/v2.1/console"
178
+ assert (
179
+ dataservices[0].endpoint_description_url
180
+ == "https://data.paris2024.org/api/explore/v2.1/swagger.json"
181
+ )
182
+ assert (
183
+ dataservices[0].harvest.remote_url
184
+ == "https://data.paris2024.org/api/explore/v2.1/console"
185
+ )
184
186
 
185
187
  def test_harvest_literal_spatial(self, rmock):
186
- url = mock_dcat(rmock, 'evian.json')
188
+ url = mock_dcat(rmock, "evian.json")
187
189
  org = OrganizationFactory()
188
- source = HarvestSourceFactory(backend='dcat',
189
- url=url,
190
- organization=org)
191
-
190
+ source = HarvestSourceFactory(backend="dcat", url=url, organization=org)
191
+
192
192
  actions.run(source.slug)
193
193
 
194
194
  datasets = {d.harvest.dct_identifier: d for d in Dataset.objects}
195
195
  assert len(datasets) == 8
196
- assert datasets['https://www.arcgis.com/home/item.html?id=f6565516d1354383b25793e630cf3f2b&sublayer=5'].spatial is not None
197
- assert datasets['https://www.arcgis.com/home/item.html?id=f6565516d1354383b25793e630cf3f2b&sublayer=5'].spatial.geom == {'type': 'MultiPolygon', 'coordinates': [[[[6.5735, 46.3912], [6.6069, 46.3912], [6.6069, 46.4028], [6.5735, 46.4028], [6.5735, 46.3912]]]]}
198
-
196
+ assert (
197
+ datasets[
198
+ "https://www.arcgis.com/home/item.html?id=f6565516d1354383b25793e630cf3f2b&sublayer=5"
199
+ ].spatial
200
+ is not None
201
+ )
202
+ assert datasets[
203
+ "https://www.arcgis.com/home/item.html?id=f6565516d1354383b25793e630cf3f2b&sublayer=5"
204
+ ].spatial.geom == {
205
+ "type": "MultiPolygon",
206
+ "coordinates": [
207
+ [
208
+ [
209
+ [6.5735, 46.3912],
210
+ [6.6069, 46.3912],
211
+ [6.6069, 46.4028],
212
+ [6.5735, 46.4028],
213
+ [6.5735, 46.3912],
214
+ ]
215
+ ]
216
+ ],
217
+ }
199
218
 
200
- @pytest.mark.skip(reason="Mocking S3 requires `moto` which is not available for our current Python 3.7. We can manually test it.")
201
- @pytest.mark.options(SCHEMA_CATALOG_URL='https://example.com/schemas', HARVEST_JOBS_RETENTION_DAYS=0)
219
+ @pytest.mark.skip(
220
+ reason="Mocking S3 requires `moto` which is not available for our current Python 3.7. We can manually test it."
221
+ )
222
+ @pytest.mark.options(
223
+ SCHEMA_CATALOG_URL="https://example.com/schemas", HARVEST_JOBS_RETENTION_DAYS=0
224
+ )
202
225
  # @mock_s3
203
226
  # @pytest.mark.options(HARVEST_MAX_CATALOG_SIZE_IN_MONGO=15, HARVEST_GRAPHS_S3_BUCKET="test_bucket", S3_URL="https://example.org", S3_ACCESS_KEY_ID="myUser", S3_SECRET_ACCESS_KEY="password")
204
227
  def test_harvest_big_catalog(self, rmock):
205
- rmock.get('https://example.com/schemas', json=ResourceSchemaMockData.get_mock_data())
228
+ rmock.get("https://example.com/schemas", json=ResourceSchemaMockData.get_mock_data())
206
229
 
207
230
  # We need to create the bucket since this is all in Moto's 'virtual' AWS account
208
231
  # conn = boto3.resource(
@@ -213,134 +236,153 @@ class DcatBackendTest:
213
236
  # )
214
237
  # conn.create_bucket(Bucket="test_bucket")
215
238
 
216
- filename = 'bnodes.xml'
239
+ filename = "bnodes.xml"
217
240
  url = mock_dcat(rmock, filename)
218
241
  org = OrganizationFactory()
219
- source = HarvestSourceFactory(backend='dcat',
220
- url=url,
221
- organization=org)
242
+ source = HarvestSourceFactory(backend="dcat", url=url, organization=org)
222
243
 
223
244
  actions.run(source.slug)
224
245
 
225
246
  datasets = {d.harvest.dct_identifier: d for d in Dataset.objects}
226
247
 
227
- assert datasets['1'].schema == None
228
- resources_by_title = { resource['title']: resource for resource in datasets['1'].resources }
248
+ assert datasets["1"].schema == None
249
+ resources_by_title = {resource["title"]: resource for resource in datasets["1"].resources}
229
250
 
230
251
  # Schema with wrong version are considered as external. Maybe we could change this in the future
231
- assert resources_by_title['Resource 1-2'].schema.url == 'https://schema.data.gouv.fr/schemas/etalab/schema-irve-statique/1337.42.0/schema-statique.json'
232
- assert resources_by_title['Resource 1-2'].schema.name == None
233
- assert resources_by_title['Resource 1-2'].schema.version == None
252
+ assert (
253
+ resources_by_title["Resource 1-2"].schema.url
254
+ == "https://schema.data.gouv.fr/schemas/etalab/schema-irve-statique/1337.42.0/schema-statique.json"
255
+ )
256
+ assert resources_by_title["Resource 1-2"].schema.name == None
257
+ assert resources_by_title["Resource 1-2"].schema.version == None
234
258
 
235
- assert datasets['2'].schema.name == 'RGF93 / Lambert-93 (EPSG:2154)'
236
- assert datasets['2'].schema.url == 'http://inspire.ec.europa.eu/glossary/SpatialReferenceSystem'
237
- resources_by_title = { resource['title']: resource for resource in datasets['2'].resources }
259
+ assert datasets["2"].schema.name == "RGF93 / Lambert-93 (EPSG:2154)"
260
+ assert (
261
+ datasets["2"].schema.url
262
+ == "http://inspire.ec.europa.eu/glossary/SpatialReferenceSystem"
263
+ )
264
+ resources_by_title = {resource["title"]: resource for resource in datasets["2"].resources}
238
265
 
239
266
  # Unknown schema are kept as they were provided
240
- assert resources_by_title['Resource 2-1'].schema.name == 'Example Schema'
241
- assert resources_by_title['Resource 2-1'].schema.url == 'https://example.org/schema.json'
242
- assert resources_by_title['Resource 2-1'].schema.version == None
267
+ assert resources_by_title["Resource 2-1"].schema.name == "Example Schema"
268
+ assert resources_by_title["Resource 2-1"].schema.url == "https://example.org/schema.json"
269
+ assert resources_by_title["Resource 2-1"].schema.version == None
243
270
 
244
- assert resources_by_title['Resource 2-2'].schema == None
271
+ assert resources_by_title["Resource 2-2"].schema == None
245
272
 
246
- assert datasets['3'].schema == None
247
- resources_by_title = { resource['title']: resource for resource in datasets['3'].resources }
273
+ assert datasets["3"].schema == None
274
+ resources_by_title = {resource["title"]: resource for resource in datasets["3"].resources}
248
275
 
249
276
  # If there is just the URL, and it matches a known schema inside the catalog, only set the name and the version
250
277
  # (discard the URL)
251
- assert resources_by_title['Resource 3-1'].schema.name == 'etalab/schema-irve-statique'
252
- assert resources_by_title['Resource 3-1'].schema.url == None
253
- assert resources_by_title['Resource 3-1'].schema.version == '2.2.0'
278
+ assert resources_by_title["Resource 3-1"].schema.name == "etalab/schema-irve-statique"
279
+ assert resources_by_title["Resource 3-1"].schema.url == None
280
+ assert resources_by_title["Resource 3-1"].schema.version == "2.2.0"
254
281
 
255
- job = HarvestJob.objects.order_by('-id').first()
282
+ job = HarvestJob.objects.order_by("-id").first()
256
283
 
257
284
  assert job.source.slug == source.slug
258
- assert get_from_json(current_app.config.get('HARVEST_GRAPHS_S3_BUCKET'), job.data['filename']) is not None
285
+ assert (
286
+ get_from_json(current_app.config.get("HARVEST_GRAPHS_S3_BUCKET"), job.data["filename"])
287
+ is not None
288
+ )
259
289
 
260
290
  # Retention is 0 days in config
261
291
  actions.purge_jobs()
262
- assert get_from_json(current_app.config.get('HARVEST_GRAPHS_S3_BUCKET'), job.data['filename']) is None
292
+ assert (
293
+ get_from_json(current_app.config.get("HARVEST_GRAPHS_S3_BUCKET"), job.data["filename"])
294
+ is None
295
+ )
263
296
 
264
- @pytest.mark.options(SCHEMA_CATALOG_URL='https://example.com/schemas', HARVEST_MAX_ITEMS=2)
297
+ @pytest.mark.options(SCHEMA_CATALOG_URL="https://example.com/schemas", HARVEST_MAX_ITEMS=2)
265
298
  def test_harvest_max_items(self, rmock):
266
- rmock.get('https://example.com/schemas', json=ResourceSchemaMockData.get_mock_data())
299
+ rmock.get("https://example.com/schemas", json=ResourceSchemaMockData.get_mock_data())
267
300
 
268
- filename = 'bnodes.xml'
301
+ filename = "bnodes.xml"
269
302
  url = mock_dcat(rmock, filename)
270
303
  org = OrganizationFactory()
271
- source = HarvestSourceFactory(backend='dcat', url=url, organization=org)
304
+ source = HarvestSourceFactory(backend="dcat", url=url, organization=org)
272
305
 
273
306
  actions.run(source.slug)
274
307
 
275
308
  assert Dataset.objects.count() == 2
276
- assert HarvestJob.objects.first().status == 'done'
309
+ assert HarvestJob.objects.first().status == "done"
277
310
 
278
- @pytest.mark.options(SCHEMA_CATALOG_URL='https://example.com/schemas')
311
+ @pytest.mark.options(SCHEMA_CATALOG_URL="https://example.com/schemas")
279
312
  def test_harvest_spatial(self, rmock):
280
- rmock.get('https://example.com/schemas', json=ResourceSchemaMockData.get_mock_data())
313
+ rmock.get("https://example.com/schemas", json=ResourceSchemaMockData.get_mock_data())
281
314
 
282
- filename = 'bnodes.xml'
315
+ filename = "bnodes.xml"
283
316
  url = mock_dcat(rmock, filename)
284
317
  org = OrganizationFactory()
285
- source = HarvestSourceFactory(backend='dcat', url=url, organization=org)
318
+ source = HarvestSourceFactory(backend="dcat", url=url, organization=org)
286
319
 
287
320
  actions.run(source.slug)
288
321
 
289
322
  datasets = {d.harvest.dct_identifier: d for d in Dataset.objects}
290
323
 
291
- assert datasets['1'].spatial == None
292
- assert datasets['2'].spatial.geom == {'type': 'MultiPolygon', 'coordinates': [[[[-6,51],[10,51],[10,40],[-6,40],[-6,51]]], [[[4, 45], [4, 46], [4, 46], [4, 45], [4, 45]]], [[[159, -25.], [159, -11], [212, -11], [212, -25.], [159, -25.]]]]}
293
- assert datasets['3'].spatial == None
324
+ assert datasets["1"].spatial == None
325
+ assert datasets["2"].spatial.geom == {
326
+ "type": "MultiPolygon",
327
+ "coordinates": [
328
+ [[[-6, 51], [10, 51], [10, 40], [-6, 40], [-6, 51]]],
329
+ [[[4, 45], [4, 46], [4, 46], [4, 45], [4, 45]]],
330
+ [[[159, -25.0], [159, -11], [212, -11], [212, -25.0], [159, -25.0]]],
331
+ ],
332
+ }
333
+ assert datasets["3"].spatial == None
294
334
 
295
- @pytest.mark.options(SCHEMA_CATALOG_URL='https://example.com/schemas')
335
+ @pytest.mark.options(SCHEMA_CATALOG_URL="https://example.com/schemas")
296
336
  def test_harvest_schemas(self, rmock):
297
- rmock.get('https://example.com/schemas', json=ResourceSchemaMockData.get_mock_data())
337
+ rmock.get("https://example.com/schemas", json=ResourceSchemaMockData.get_mock_data())
298
338
 
299
- filename = 'bnodes.xml'
339
+ filename = "bnodes.xml"
300
340
  url = mock_dcat(rmock, filename)
301
341
  org = OrganizationFactory()
302
- source = HarvestSourceFactory(backend='dcat',
303
- url=url,
304
- organization=org)
342
+ source = HarvestSourceFactory(backend="dcat", url=url, organization=org)
305
343
 
306
344
  actions.run(source.slug)
307
345
 
308
346
  datasets = {d.harvest.dct_identifier: d for d in Dataset.objects}
309
347
 
310
- assert datasets['1'].schema == None
311
- resources_by_title = { resource['title']: resource for resource in datasets['1'].resources }
348
+ assert datasets["1"].schema == None
349
+ resources_by_title = {resource["title"]: resource for resource in datasets["1"].resources}
312
350
 
313
351
  # Schema with wrong version are considered as external. Maybe we could change this in the future
314
- assert resources_by_title['Resource 1-2'].schema.url == 'https://schema.data.gouv.fr/schemas/etalab/schema-irve-statique/1337.42.0/schema-statique.json'
315
- assert resources_by_title['Resource 1-2'].schema.name == None
316
- assert resources_by_title['Resource 1-2'].schema.version == None
352
+ assert (
353
+ resources_by_title["Resource 1-2"].schema.url
354
+ == "https://schema.data.gouv.fr/schemas/etalab/schema-irve-statique/1337.42.0/schema-statique.json"
355
+ )
356
+ assert resources_by_title["Resource 1-2"].schema.name == None
357
+ assert resources_by_title["Resource 1-2"].schema.version == None
317
358
 
318
- assert datasets['2'].schema.name == 'RGF93 / Lambert-93 (EPSG:2154)'
319
- assert datasets['2'].schema.url == 'http://inspire.ec.europa.eu/glossary/SpatialReferenceSystem'
320
- resources_by_title = { resource['title']: resource for resource in datasets['2'].resources }
359
+ assert datasets["2"].schema.name == "RGF93 / Lambert-93 (EPSG:2154)"
360
+ assert (
361
+ datasets["2"].schema.url
362
+ == "http://inspire.ec.europa.eu/glossary/SpatialReferenceSystem"
363
+ )
364
+ resources_by_title = {resource["title"]: resource for resource in datasets["2"].resources}
321
365
 
322
366
  # Unknown schema are kept as they were provided
323
- assert resources_by_title['Resource 2-1'].schema.name == 'Example Schema'
324
- assert resources_by_title['Resource 2-1'].schema.url == 'https://example.org/schema.json'
325
- assert resources_by_title['Resource 2-1'].schema.version == None
367
+ assert resources_by_title["Resource 2-1"].schema.name == "Example Schema"
368
+ assert resources_by_title["Resource 2-1"].schema.url == "https://example.org/schema.json"
369
+ assert resources_by_title["Resource 2-1"].schema.version == None
326
370
 
327
- assert resources_by_title['Resource 2-2'].schema == None
371
+ assert resources_by_title["Resource 2-2"].schema == None
328
372
 
329
- assert datasets['3'].schema == None
330
- resources_by_title = { resource['title']: resource for resource in datasets['3'].resources }
373
+ assert datasets["3"].schema == None
374
+ resources_by_title = {resource["title"]: resource for resource in datasets["3"].resources}
331
375
 
332
376
  # If there is just the URL, and it matches a known schema inside the catalog, only set the name and the version
333
377
  # (discard the URL)
334
- assert resources_by_title['Resource 3-1'].schema.name == 'etalab/schema-irve-statique'
335
- assert resources_by_title['Resource 3-1'].schema.url == None
336
- assert resources_by_title['Resource 3-1'].schema.version == '2.2.0'
378
+ assert resources_by_title["Resource 3-1"].schema.name == "etalab/schema-irve-statique"
379
+ assert resources_by_title["Resource 3-1"].schema.url == None
380
+ assert resources_by_title["Resource 3-1"].schema.version == "2.2.0"
337
381
 
338
382
  def test_simple_nested_attributes(self, rmock):
339
- filename = 'nested.jsonld'
383
+ filename = "nested.jsonld"
340
384
  url = mock_dcat(rmock, filename)
341
- source = HarvestSourceFactory(backend='dcat',
342
- url=url,
343
- organization=OrganizationFactory())
385
+ source = HarvestSourceFactory(backend="dcat", url=url, organization=OrganizationFactory())
344
386
 
345
387
  actions.run(source.slug)
346
388
 
@@ -353,23 +395,20 @@ class DcatBackendTest:
353
395
  assert dataset.temporal_coverage is not None
354
396
  assert dataset.temporal_coverage.start == date(2016, 1, 1)
355
397
  assert dataset.temporal_coverage.end == date(2016, 12, 5)
356
- assert dataset.harvest.remote_url == 'http://data.test.org/datasets/1'
398
+ assert dataset.harvest.remote_url == "http://data.test.org/datasets/1"
357
399
 
358
400
  assert len(dataset.resources) == 1
359
401
 
360
402
  resource = dataset.resources[0]
361
403
  assert resource.checksum is not None
362
- assert resource.checksum.type == 'sha1'
363
- assert (resource.checksum.value
364
- == 'fb4106aa286a53be44ec99515f0f0421d4d7ad7d')
404
+ assert resource.checksum.type == "sha1"
405
+ assert resource.checksum.value == "fb4106aa286a53be44ec99515f0f0421d4d7ad7d"
365
406
 
366
407
  def test_idempotence(self, rmock):
367
- filename = 'flat.jsonld'
408
+ filename = "flat.jsonld"
368
409
  url = mock_dcat(rmock, filename)
369
410
  org = OrganizationFactory()
370
- source = HarvestSourceFactory(backend='dcat',
371
- url=url,
372
- organization=org)
411
+ source = HarvestSourceFactory(backend="dcat", url=url, organization=org)
373
412
 
374
413
  # Run the same havester twice
375
414
  actions.run(source.slug)
@@ -378,17 +417,14 @@ class DcatBackendTest:
378
417
  datasets = {d.harvest.dct_identifier: d for d in Dataset.objects}
379
418
 
380
419
  assert len(datasets) == 3
381
- assert len(datasets['1'].resources) == 2
382
- assert len(datasets['2'].resources) == 2
383
- assert len(datasets['3'].resources) == 1
420
+ assert len(datasets["1"].resources) == 2
421
+ assert len(datasets["2"].resources) == 2
422
+ assert len(datasets["3"].resources) == 1
384
423
 
385
424
  def test_hydra_partial_collection_view_pagination(self, rmock):
386
- url = mock_pagination(rmock, 'catalog.jsonld',
387
- 'partial-collection-{page}.jsonld')
425
+ url = mock_pagination(rmock, "catalog.jsonld", "partial-collection-{page}.jsonld")
388
426
  org = OrganizationFactory()
389
- source = HarvestSourceFactory(backend='dcat',
390
- url=url,
391
- organization=org)
427
+ source = HarvestSourceFactory(backend="dcat", url=url, organization=org)
392
428
 
393
429
  actions.run(source.slug)
394
430
 
@@ -398,12 +434,9 @@ class DcatBackendTest:
398
434
  assert len(job.items) == 4
399
435
 
400
436
  def test_hydra_legacy_paged_collection_pagination(self, rmock):
401
- url = mock_pagination(rmock, 'catalog.jsonld',
402
- 'paged-collection-{page}.jsonld')
437
+ url = mock_pagination(rmock, "catalog.jsonld", "paged-collection-{page}.jsonld")
403
438
  org = OrganizationFactory()
404
- source = HarvestSourceFactory(backend='dcat',
405
- url=url,
406
- organization=org)
439
+ source = HarvestSourceFactory(backend="dcat", url=url, organization=org)
407
440
 
408
441
  actions.run(source.slug)
409
442
 
@@ -413,12 +446,10 @@ class DcatBackendTest:
413
446
  assert len(job.items) == 4
414
447
 
415
448
  def test_failure_on_initialize(self, rmock):
416
- url = DCAT_URL_PATTERN.format(path='', domain=TEST_DOMAIN)
417
- rmock.get(url, text='should fail')
449
+ url = DCAT_URL_PATTERN.format(path="", domain=TEST_DOMAIN)
450
+ rmock.get(url, text="should fail")
418
451
  org = OrganizationFactory()
419
- source = HarvestSourceFactory(backend='dcat',
420
- url=url,
421
- organization=org)
452
+ source = HarvestSourceFactory(backend="dcat", url=url, organization=org)
422
453
 
423
454
  actions.run(source.slug)
424
455
 
@@ -426,15 +457,13 @@ class DcatBackendTest:
426
457
 
427
458
  job = source.get_last_job()
428
459
 
429
- assert job.status == 'failed'
460
+ assert job.status == "failed"
430
461
 
431
462
  def test_supported_mime_type(self, rmock):
432
- url = mock_dcat(rmock, 'catalog.xml', path='without/extension')
433
- rmock.head(url, headers={'Content-Type': 'application/xml; charset=utf-8'})
463
+ url = mock_dcat(rmock, "catalog.xml", path="without/extension")
464
+ rmock.head(url, headers={"Content-Type": "application/xml; charset=utf-8"})
434
465
  org = OrganizationFactory()
435
- source = HarvestSourceFactory(backend='dcat',
436
- url=url,
437
- organization=org)
466
+ source = HarvestSourceFactory(backend="dcat", url=url, organization=org)
438
467
 
439
468
  actions.run(source.slug)
440
469
 
@@ -442,144 +471,154 @@ class DcatBackendTest:
442
471
 
443
472
  job = source.get_last_job()
444
473
 
445
- assert job.status == 'done'
474
+ assert job.status == "done"
446
475
  assert job.errors == []
447
476
  assert len(job.items) == 3
448
477
 
449
478
  def test_xml_catalog(self, rmock):
450
- LicenseFactory(id='lov2', title='Licence Ouverte Version 2.0')
479
+ LicenseFactory(id="lov2", title="Licence Ouverte Version 2.0")
451
480
 
452
- url = mock_dcat(rmock, 'catalog.xml', path='catalog.xml')
481
+ url = mock_dcat(rmock, "catalog.xml", path="catalog.xml")
453
482
  org = OrganizationFactory()
454
- source = HarvestSourceFactory(backend='dcat',
455
- url=url,
456
- organization=org)
483
+ source = HarvestSourceFactory(backend="dcat", url=url, organization=org)
457
484
 
458
485
  actions.run(source.slug)
459
486
 
460
487
  # test dct:license support
461
- dataset = Dataset.objects.get(harvest__dct_identifier='3')
462
- assert dataset.license.id == 'lov2'
463
- assert dataset.harvest.remote_url == 'http://data.test.org/datasets/3'
464
- assert dataset.harvest.remote_id == '3'
488
+ dataset = Dataset.objects.get(harvest__dct_identifier="3")
489
+ assert dataset.license.id == "lov2"
490
+ assert dataset.harvest.remote_url == "http://data.test.org/datasets/3"
491
+ assert dataset.harvest.remote_id == "3"
465
492
  assert dataset.harvest.created_at.date() == date(2016, 12, 14)
466
493
  assert dataset.harvest.modified_at.date() == date(2016, 12, 14)
467
- assert dataset.frequency == 'daily'
468
- assert dataset.description == 'Dataset 3 description'
494
+ assert dataset.frequency == "daily"
495
+ assert dataset.description == "Dataset 3 description"
469
496
 
470
497
  assert dataset.temporal_coverage is not None
471
498
  assert dataset.temporal_coverage.start == date(2016, 1, 1)
472
499
  assert dataset.temporal_coverage.end == date(2016, 12, 5)
473
500
 
474
- assert dataset.extras["harvest"]["dct:accessRights"] == "http://inspire.ec.europa.eu/metadata-codelist/LimitationsOnPublicAccess/INSPIRE_Directive_Article13_1e"
475
- assert dataset.extras["harvest"]["dct:provenance"] == ["Description de la provenance des données"]
501
+ assert (
502
+ dataset.extras["harvest"]["dct:accessRights"]
503
+ == "http://inspire.ec.europa.eu/metadata-codelist/LimitationsOnPublicAccess/INSPIRE_Directive_Article13_1e"
504
+ )
505
+ assert dataset.extras["harvest"]["dct:provenance"] == [
506
+ "Description de la provenance des données"
507
+ ]
476
508
 
477
- assert 'observation-de-la-terre-et-environnement' in dataset.tags
478
- assert 'hvd' in dataset.tags
509
+ assert "observation-de-la-terre-et-environnement" in dataset.tags
510
+ assert "hvd" in dataset.tags
479
511
 
480
- dataset = Dataset.objects.get(harvest__dct_identifier='1')
512
+ dataset = Dataset.objects.get(harvest__dct_identifier="1")
481
513
  # test html abstract description support
482
- assert dataset.description == '# h1 title\n\n## h2 title\n\n **and bold text**'
514
+ assert dataset.description == "# h1 title\n\n## h2 title\n\n **and bold text**"
483
515
  # test DCAT periodoftime
484
516
  assert dataset.temporal_coverage is not None
485
517
  assert dataset.temporal_coverage.start == date(2016, 1, 1)
486
518
  assert dataset.temporal_coverage.end == date(2016, 12, 5)
487
- assert dataset.contact_point['email'] == 'hello@its.me'
488
- assert dataset.contact_point['name'] == 'Organization contact'
519
+ assert dataset.contact_point["email"] == "hello@its.me"
520
+ assert dataset.contact_point["name"] == "Organization contact"
489
521
  assert dataset.frequency is None
490
522
 
491
523
  assert len(dataset.resources) == 3
492
524
 
493
- resource_1 = next(res for res in dataset.resources if res.title == 'Resource 1-1')
494
- assert resource_1.filetype == 'remote'
525
+ resource_1 = next(res for res in dataset.resources if res.title == "Resource 1-1")
526
+ assert resource_1.filetype == "remote"
495
527
  # Format is a IANA URI
496
- assert resource_1.format == 'json'
497
- assert resource_1.mime == 'application/json'
528
+ assert resource_1.format == "json"
529
+ assert resource_1.mime == "application/json"
498
530
  assert resource_1.filesize == 12323
499
- assert resource_1.description == 'A JSON resource'
500
- assert resource_1.url == 'http://data.test.org/datasets/1/resources/1/file.json'
501
- assert resource_1.type == 'main'
531
+ assert resource_1.description == "A JSON resource"
532
+ assert resource_1.url == "http://data.test.org/datasets/1/resources/1/file.json"
533
+ assert resource_1.type == "main"
502
534
 
503
- resource_2 = next(res for res in dataset.resources if res.title == 'Resource 1-2')
504
- assert resource_2.format == 'json'
505
- assert resource_2.description == 'A JSON resource'
506
- assert resource_2.url == 'http://data.test.org/datasets/1/resources/2/file.json'
507
- assert resource_2.type == 'main'
535
+ resource_2 = next(res for res in dataset.resources if res.title == "Resource 1-2")
536
+ assert resource_2.format == "json"
537
+ assert resource_2.description == "A JSON resource"
538
+ assert resource_2.url == "http://data.test.org/datasets/1/resources/2/file.json"
539
+ assert resource_2.type == "main"
508
540
 
509
541
  # Make sure additionnal resource is correctly harvested
510
- resource_3 = next(res for res in dataset.resources if res.title == 'Resource 1-3')
511
- assert resource_3.format == 'json'
512
- assert resource_3.description == ''
513
- assert resource_3.url == 'http://data.test.org/datasets/1/resources/3'
514
- assert resource_3.type == 'other'
542
+ resource_3 = next(res for res in dataset.resources if res.title == "Resource 1-3")
543
+ assert resource_3.format == "json"
544
+ assert resource_3.description == ""
545
+ assert resource_3.url == "http://data.test.org/datasets/1/resources/3"
546
+ assert resource_3.type == "other"
515
547
 
516
548
  def test_geonetwork_xml_catalog(self, rmock):
517
- url = mock_dcat(rmock, 'geonetwork.xml', path='catalog.xml')
549
+ url = mock_dcat(rmock, "geonetwork.xml", path="catalog.xml")
518
550
  org = OrganizationFactory()
519
- source = HarvestSourceFactory(backend='dcat',
520
- url=url,
521
- organization=org)
551
+ source = HarvestSourceFactory(backend="dcat", url=url, organization=org)
522
552
  actions.run(source.slug)
523
553
  dataset = Dataset.objects.filter(organization=org).first()
524
554
  assert dataset is not None
525
555
  assert dataset.harvest is not None
526
- assert dataset.harvest.remote_id == '0c456d2d-9548-4a2a-94ef-231d9d890ce2 https://sig.oreme.org/geonetwork/srv/resources0c456d2d-9548-4a2a-94ef-231d9d890ce2' # noqa
527
- assert dataset.harvest.dct_identifier == '0c456d2d-9548-4a2a-94ef-231d9d890ce2 https://sig.oreme.org/geonetwork/srv/resources0c456d2d-9548-4a2a-94ef-231d9d890ce2' # noqa
556
+ assert (
557
+ dataset.harvest.remote_id
558
+ == "0c456d2d-9548-4a2a-94ef-231d9d890ce2 https://sig.oreme.org/geonetwork/srv/resources0c456d2d-9548-4a2a-94ef-231d9d890ce2"
559
+ ) # noqa
560
+ assert (
561
+ dataset.harvest.dct_identifier
562
+ == "0c456d2d-9548-4a2a-94ef-231d9d890ce2 https://sig.oreme.org/geonetwork/srv/resources0c456d2d-9548-4a2a-94ef-231d9d890ce2"
563
+ ) # noqa
528
564
  assert dataset.harvest.created_at.date() == date(2004, 11, 3)
529
565
  assert dataset.harvest.modified_at is None
530
- assert dataset.harvest.uri == 'https://sig.oreme.org/geonetwork/srv/resources/datasets/0c456d2d-9548-4a2a-94ef-231d9d890ce2 https://sig.oreme.org/geonetwork/srv/resources0c456d2d-9548-4a2a-94ef-231d9d890ce2' # noqa
566
+ assert (
567
+ dataset.harvest.uri
568
+ == "https://sig.oreme.org/geonetwork/srv/resources/datasets/0c456d2d-9548-4a2a-94ef-231d9d890ce2 https://sig.oreme.org/geonetwork/srv/resources0c456d2d-9548-4a2a-94ef-231d9d890ce2"
569
+ ) # noqa
531
570
  assert dataset.harvest.remote_url is None # the uri validation failed
532
- assert dataset.description.startswith('Data of type chemistry')
571
+ assert dataset.description.startswith("Data of type chemistry")
533
572
  assert dataset.temporal_coverage is not None
534
573
  assert dataset.temporal_coverage.start == date(2004, 11, 3)
535
574
  assert dataset.temporal_coverage.end == date(2005, 3, 30)
536
575
 
537
576
  def test_sigoreme_xml_catalog(self, rmock):
538
- LicenseFactory(id='fr-lo', title='Licence ouverte / Open Licence')
539
- url = mock_dcat(rmock, 'sig.oreme.rdf')
577
+ LicenseFactory(id="fr-lo", title="Licence ouverte / Open Licence")
578
+ url = mock_dcat(rmock, "sig.oreme.rdf")
540
579
  org = OrganizationFactory()
541
- source = HarvestSourceFactory(backend='dcat',
542
- url=url,
543
- organization=org)
580
+ source = HarvestSourceFactory(backend="dcat", url=url, organization=org)
544
581
  actions.run(source.slug)
545
582
  dataset = Dataset.objects.filter(organization=org).first()
546
583
 
547
584
  assert dataset is not None
548
- assert dataset.frequency == 'irregular'
549
- assert 'gravi' in dataset.tags # support dcat:keyword
550
- assert 'geodesy' in dataset.tags # support dcat:theme
551
- assert dataset.license.id == 'fr-lo'
585
+ assert dataset.frequency == "irregular"
586
+ assert "gravi" in dataset.tags # support dcat:keyword
587
+ assert "geodesy" in dataset.tags # support dcat:theme
588
+ assert dataset.license.id == "fr-lo"
552
589
  assert len(dataset.resources) == 1
553
590
  assert dataset.description.startswith("Data from the 'National network")
554
591
  assert dataset.harvest is not None
555
- assert dataset.harvest.dct_identifier == '0437a976-cff1-4fa6-807a-c23006df2f8f'
556
- assert dataset.harvest.remote_id == '0437a976-cff1-4fa6-807a-c23006df2f8f'
592
+ assert dataset.harvest.dct_identifier == "0437a976-cff1-4fa6-807a-c23006df2f8f"
593
+ assert dataset.harvest.remote_id == "0437a976-cff1-4fa6-807a-c23006df2f8f"
557
594
  assert dataset.harvest.created_at is None
558
595
  assert dataset.harvest.modified_at is None
559
- assert dataset.harvest.uri == 'https://sig.oreme.org/geonetwork/srv/eng/catalog.search#/metadata//datasets/0437a976-cff1-4fa6-807a-c23006df2f8f' # noqa
560
- assert dataset.harvest.remote_url == 'https://sig.oreme.org/geonetwork/srv/eng/catalog.search#/metadata//datasets/0437a976-cff1-4fa6-807a-c23006df2f8f' # noqa
596
+ assert (
597
+ dataset.harvest.uri
598
+ == "https://sig.oreme.org/geonetwork/srv/eng/catalog.search#/metadata//datasets/0437a976-cff1-4fa6-807a-c23006df2f8f"
599
+ ) # noqa
600
+ assert (
601
+ dataset.harvest.remote_url
602
+ == "https://sig.oreme.org/geonetwork/srv/eng/catalog.search#/metadata//datasets/0437a976-cff1-4fa6-807a-c23006df2f8f"
603
+ ) # noqa
561
604
  assert dataset.harvest.last_update.date() == date.today()
562
605
 
563
606
  def test_user_agent_get(self, rmock):
564
- url = mock_dcat(rmock, 'catalog.xml', path='without/extension')
565
- rmock.head(url, headers={'Content-Type': 'application/xml; charset=utf-8'})
607
+ url = mock_dcat(rmock, "catalog.xml", path="without/extension")
608
+ rmock.head(url, headers={"Content-Type": "application/xml; charset=utf-8"})
566
609
  get_mock = rmock.get(url)
567
610
  org = OrganizationFactory()
568
- source = HarvestSourceFactory(backend='dcat',
569
- url=url,
570
- organization=org)
611
+ source = HarvestSourceFactory(backend="dcat", url=url, organization=org)
571
612
  actions.run(source.slug)
572
613
 
573
- assert 'User-Agent' in get_mock.last_request.headers
574
- assert get_mock.last_request.headers['User-Agent'] == 'uData/0.1 dcat'
614
+ assert "User-Agent" in get_mock.last_request.headers
615
+ assert get_mock.last_request.headers["User-Agent"] == "uData/0.1 dcat"
575
616
 
576
617
  def test_unsupported_mime_type(self, rmock):
577
- url = DCAT_URL_PATTERN.format(path='', domain=TEST_DOMAIN)
578
- rmock.head(url, headers={'Content-Type': 'text/html; charset=utf-8'})
618
+ url = DCAT_URL_PATTERN.format(path="", domain=TEST_DOMAIN)
619
+ rmock.head(url, headers={"Content-Type": "text/html; charset=utf-8"})
579
620
  org = OrganizationFactory()
580
- source = HarvestSourceFactory(backend='dcat',
581
- url=url,
582
- organization=org)
621
+ source = HarvestSourceFactory(backend="dcat", url=url, organization=org)
583
622
 
584
623
  actions.run(source.slug)
585
624
 
@@ -587,19 +626,17 @@ class DcatBackendTest:
587
626
 
588
627
  job = source.get_last_job()
589
628
 
590
- assert job.status == 'failed'
629
+ assert job.status == "failed"
591
630
  assert len(job.errors) == 1
592
631
 
593
632
  error = job.errors[0]
594
633
  assert error.message == 'Unsupported mime type "text/html"'
595
634
 
596
635
  def test_unable_to_detect_format(self, rmock):
597
- url = DCAT_URL_PATTERN.format(path='', domain=TEST_DOMAIN)
598
- rmock.head(url, headers={'Content-Type': ''})
636
+ url = DCAT_URL_PATTERN.format(path="", domain=TEST_DOMAIN)
637
+ rmock.head(url, headers={"Content-Type": ""})
599
638
  org = OrganizationFactory()
600
- source = HarvestSourceFactory(backend='dcat',
601
- url=url,
602
- organization=org)
639
+ source = HarvestSourceFactory(backend="dcat", url=url, organization=org)
603
640
 
604
641
  actions.run(source.slug)
605
642
 
@@ -607,43 +644,46 @@ class DcatBackendTest:
607
644
 
608
645
  job = source.get_last_job()
609
646
 
610
- assert job.status == 'failed'
647
+ assert job.status == "failed"
611
648
  assert len(job.errors) == 1
612
649
 
613
650
  error = job.errors[0]
614
- expected = 'Unable to detect format from extension or mime type'
651
+ expected = "Unable to detect format from extension or mime type"
615
652
  assert error.message == expected
616
653
 
617
654
  def test_use_replaced_uris(self, rmock, mocker):
618
655
  mocker.patch.dict(
619
656
  URIS_TO_REPLACE,
620
- {'http://example.org/this-url-does-not-exist': 'https://json-ld.org/contexts/person.jsonld'}
657
+ {
658
+ "http://example.org/this-url-does-not-exist": "https://json-ld.org/contexts/person.jsonld"
659
+ },
621
660
  )
622
- url = DCAT_URL_PATTERN.format(path='', domain=TEST_DOMAIN)
623
- rmock.get(url, json={
624
- '@context': 'http://example.org/this-url-does-not-exist',
625
- '@type': 'dcat:Catalog',
626
- 'dataset': []
627
- })
628
- rmock.head(url, headers={'Content-Type': 'application/json'})
661
+ url = DCAT_URL_PATTERN.format(path="", domain=TEST_DOMAIN)
662
+ rmock.get(
663
+ url,
664
+ json={
665
+ "@context": "http://example.org/this-url-does-not-exist",
666
+ "@type": "dcat:Catalog",
667
+ "dataset": [],
668
+ },
669
+ )
670
+ rmock.head(url, headers={"Content-Type": "application/json"})
629
671
  org = OrganizationFactory()
630
- source = HarvestSourceFactory(backend='dcat',
631
- url=url,
632
- organization=org)
672
+ source = HarvestSourceFactory(backend="dcat", url=url, organization=org)
633
673
  actions.run(source.slug)
634
674
 
635
675
  source.reload()
636
676
 
637
677
  job = source.get_last_job()
638
678
  assert len(job.items) == 0
639
- assert job.status == 'done'
679
+ assert job.status == "done"
640
680
 
641
681
  def test_target_404(self, rmock):
642
- filename = 'obvious-format.jsonld'
682
+ filename = "obvious-format.jsonld"
643
683
  url = DCAT_URL_PATTERN.format(path=filename, domain=TEST_DOMAIN)
644
684
  rmock.get(url, status_code=404)
645
685
 
646
- source = HarvestSourceFactory(backend='dcat', url=url, organization=OrganizationFactory())
686
+ source = HarvestSourceFactory(backend="dcat", url=url, organization=OrganizationFactory())
647
687
  actions.run(source.slug)
648
688
  source.reload()
649
689
 
@@ -652,11 +692,11 @@ class DcatBackendTest:
652
692
  assert len(job.errors) == 1
653
693
  assert "404 Client Error" in job.errors[0].message
654
694
 
655
- filename = 'need-to-head-to-guess-format'
695
+ filename = "need-to-head-to-guess-format"
656
696
  url = DCAT_URL_PATTERN.format(path=filename, domain=TEST_DOMAIN)
657
697
  rmock.head(url, status_code=404)
658
698
 
659
- source = HarvestSourceFactory(backend='dcat', url=url, organization=OrganizationFactory())
699
+ source = HarvestSourceFactory(backend="dcat", url=url, organization=OrganizationFactory())
660
700
  actions.run(source.slug)
661
701
  source.reload()
662
702
 
@@ -666,16 +706,13 @@ class DcatBackendTest:
666
706
  assert "404 Client Error" in job.errors[0].message
667
707
 
668
708
 
669
- @pytest.mark.usefixtures('clean_db')
670
- @pytest.mark.options(PLUGINS=['csw'])
709
+ @pytest.mark.usefixtures("clean_db")
710
+ @pytest.mark.options(PLUGINS=["csw"])
671
711
  class CswDcatBackendTest:
672
-
673
712
  def test_geonetworkv4(self, rmock):
674
- url = mock_csw_pagination(rmock, 'geonetwork/srv/eng/csw.rdf', 'geonetworkv4-page-{}.xml')
713
+ url = mock_csw_pagination(rmock, "geonetwork/srv/eng/csw.rdf", "geonetworkv4-page-{}.xml")
675
714
  org = OrganizationFactory()
676
- source = HarvestSourceFactory(backend='csw-dcat',
677
- url=url,
678
- organization=org)
715
+ source = HarvestSourceFactory(backend="csw-dcat", url=url, organization=org)
679
716
 
680
717
  actions.run(source.slug)
681
718
 
@@ -689,48 +726,55 @@ class CswDcatBackendTest:
689
726
  assert len(datasets) == 6
690
727
 
691
728
  # First dataset
692
- dataset = datasets['https://www.geo2france.fr/2017/accidento']
693
- assert dataset.title == 'Localisation des accidents de la circulation routière en 2017'
694
- assert dataset.description == 'Accidents corporels de la circulation en Hauts de France (2017)'
695
- assert set(dataset.tags) == set([
696
- 'donnee-ouverte', 'accidentologie', 'accident', 'reseaux-de-transport', 'accident-de-la-route',
697
- 'hauts-de-france', 'nord', 'pas-de-calais', 'oise', 'somme', 'aisne'
698
- ])
729
+ dataset = datasets["https://www.geo2france.fr/2017/accidento"]
730
+ assert dataset.title == "Localisation des accidents de la circulation routière en 2017"
731
+ assert (
732
+ dataset.description == "Accidents corporels de la circulation en Hauts de France (2017)"
733
+ )
734
+ assert set(dataset.tags) == set(
735
+ [
736
+ "donnee-ouverte",
737
+ "accidentologie",
738
+ "accident",
739
+ "reseaux-de-transport",
740
+ "accident-de-la-route",
741
+ "hauts-de-france",
742
+ "nord",
743
+ "pas-de-calais",
744
+ "oise",
745
+ "somme",
746
+ "aisne",
747
+ ]
748
+ )
699
749
  assert dataset.harvest.created_at.date() == date(2017, 1, 1)
700
750
  assert len(dataset.resources) == 1
701
751
  resource = dataset.resources[0]
702
- assert resource.title == 'accidento_hdf_L93'
703
- assert resource.url == 'https://www.geo2france.fr/geoserver/cr_hdf/ows'
704
- assert resource.format == 'ogc:wms'
752
+ assert resource.title == "accidento_hdf_L93"
753
+ assert resource.url == "https://www.geo2france.fr/geoserver/cr_hdf/ows"
754
+ assert resource.format == "ogc:wms"
705
755
 
706
756
  def test_user_agent_post(self, rmock):
707
- url = mock_csw_pagination(rmock, 'geonetwork/srv/eng/csw.rdf', 'geonetworkv4-page-{}.xml')
757
+ url = mock_csw_pagination(rmock, "geonetwork/srv/eng/csw.rdf", "geonetworkv4-page-{}.xml")
708
758
  get_mock = rmock.post(url)
709
759
  org = OrganizationFactory()
710
- source = HarvestSourceFactory(backend='csw-dcat',
711
- url=url,
712
- organization=org)
760
+ source = HarvestSourceFactory(backend="csw-dcat", url=url, organization=org)
713
761
 
714
762
  actions.run(source.slug)
715
763
 
716
- assert 'User-Agent' in get_mock.last_request.headers
717
- assert get_mock.last_request.headers['User-Agent'] == 'uData/0.1 csw-dcat'
764
+ assert "User-Agent" in get_mock.last_request.headers
765
+ assert get_mock.last_request.headers["User-Agent"] == "uData/0.1 csw-dcat"
718
766
 
719
767
 
720
- @pytest.mark.usefixtures('clean_db')
721
- @pytest.mark.options(PLUGINS=['csw'])
768
+ @pytest.mark.usefixtures("clean_db")
769
+ @pytest.mark.options(PLUGINS=["csw"])
722
770
  class CswIso19139DcatBackendTest:
723
-
724
771
  def test_geo2france(self, rmock):
725
-
726
772
  with open(os.path.join(CSW_DCAT_FILES_DIR, "XSLT.xml"), "r") as f:
727
773
  xslt = f.read()
728
- url = mock_csw_pagination(rmock, 'geonetwork/srv/eng/csw.rdf', 'geonetwork-iso-page-{}.xml')
774
+ url = mock_csw_pagination(rmock, "geonetwork/srv/eng/csw.rdf", "geonetwork-iso-page-{}.xml")
729
775
  rmock.get(CswIso19139DcatBackend.XSL_URL, text=xslt)
730
776
  org = OrganizationFactory()
731
- source = HarvestSourceFactory(backend='csw-iso-19139',
732
- url=url,
733
- organization=org)
777
+ source = HarvestSourceFactory(backend="csw-iso-19139", url=url, organization=org)
734
778
 
735
779
  actions.run(source.slug)
736
780
 
@@ -745,19 +789,44 @@ class CswIso19139DcatBackendTest:
745
789
 
746
790
  # First dataset
747
791
  # dataset identifier is gmd:RS_Identifier > gmd:codeSpace + gmd:code
748
- dataset = datasets['http://catalogue.geo-ide.developpement-durable.gouv.fr/fr-120066022-orphan-residentifier-140d31c6-643d-42a9-85df-2737a118e144']
792
+ dataset = datasets[
793
+ "http://catalogue.geo-ide.developpement-durable.gouv.fr/fr-120066022-orphan-residentifier-140d31c6-643d-42a9-85df-2737a118e144"
794
+ ]
749
795
  assert dataset.title == "Plan local d'urbanisme de la commune de Cartigny"
750
- assert dataset.description == "Le présent standard de données COVADIS concerne les documents de plans locaux d'urbanisme (PLU) et les plans d'occupation des sols (POS qui valent PLU)."
751
- assert set(dataset.tags) == set([
752
- 'amenagement-urbanisme-zonages-planification', 'cartigny',
753
- 'document-durbanisme', 'donnees-ouvertes', 'plu', 'usage-des-sols'
754
- ])
796
+ assert (
797
+ dataset.description
798
+ == "Le présent standard de données COVADIS concerne les documents de plans locaux d'urbanisme (PLU) et les plans d'occupation des sols (POS qui valent PLU)."
799
+ )
800
+ assert set(dataset.tags) == set(
801
+ [
802
+ "amenagement-urbanisme-zonages-planification",
803
+ "cartigny",
804
+ "document-durbanisme",
805
+ "donnees-ouvertes",
806
+ "plu",
807
+ "usage-des-sols",
808
+ ]
809
+ )
755
810
  assert dataset.harvest.created_at.date() == date(2017, 10, 7)
756
- assert dataset.spatial.geom == {'type': 'MultiPolygon', 'coordinates':
757
- [[[[3.28133559, 50.48188019], [1.31279111, 50.48188019], [1.31279111, 49.38547516], [3.28133559, 49.38547516], [3.28133559, 50.48188019]]]]
811
+ assert dataset.spatial.geom == {
812
+ "type": "MultiPolygon",
813
+ "coordinates": [
814
+ [
815
+ [
816
+ [3.28133559, 50.48188019],
817
+ [1.31279111, 50.48188019],
818
+ [1.31279111, 49.38547516],
819
+ [3.28133559, 49.38547516],
820
+ [3.28133559, 50.48188019],
821
+ ]
822
+ ]
823
+ ],
758
824
  }
759
- assert dataset.contact_point.name == 'DDTM 80 (Direction Départementale des Territoires et de la Mer de la Somme)'
760
- assert dataset.contact_point.email == 'ddtm-sap-bsig@somme.gouv.fr'
825
+ assert (
826
+ dataset.contact_point.name
827
+ == "DDTM 80 (Direction Départementale des Territoires et de la Mer de la Somme)"
828
+ )
829
+ assert dataset.contact_point.email == "ddtm-sap-bsig@somme.gouv.fr"
761
830
 
762
831
  # License is not properly mapped in XSLT conversion
763
832
  assert dataset.license is None
@@ -767,8 +836,11 @@ class CswIso19139DcatBackendTest:
767
836
  # (See mapping at: https://semiceu.github.io/GeoDCAT-AP/releases/2.0.0/#resource-locator---on-line-resource)
768
837
  assert len(dataset.resources) == 1
769
838
  resource = dataset.resources[0]
770
- assert resource.title == 'Téléchargement direct du lot et des documents associés'
771
- assert resource.url == 'http://atom.geo-ide.developpement-durable.gouv.fr/atomArchive/GetResource?id=fr-120066022-ldd-cab63273-b3ae-4e8a-ae1c-6192e45faa94&datasetAggregate=true'
772
-
839
+ assert resource.title == "Téléchargement direct du lot et des documents associés"
840
+ assert (
841
+ resource.url
842
+ == "http://atom.geo-ide.developpement-durable.gouv.fr/atomArchive/GetResource?id=fr-120066022-ldd-cab63273-b3ae-4e8a-ae1c-6192e45faa94&datasetAggregate=true"
843
+ )
844
+
773
845
  # Sadly resource format is parsed as a blank node. Format parsing should be improved.
774
- assert re.match(r'n[0-9a-f]{32}', resource.format)
846
+ assert re.match(r"n[0-9a-f]{32}", resource.format)