udata-hydra 2.0.5.dev5384__tar.gz → 2.0.5.dev5485__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/PKG-INFO +1 -1
  2. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/pyproject.toml +1 -1
  3. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/udata_hydra/analysis/csv.py +5 -5
  4. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/udata_hydra/analysis/resource.py +1 -1
  5. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/udata_hydra/crawl/helpers.py +5 -3
  6. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/udata_hydra/routes/status.py +1 -1
  7. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/udata_hydra/utils/auth.py +5 -5
  8. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/README.md +0 -0
  9. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/udata_hydra/__init__.py +0 -0
  10. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/udata_hydra/analysis/__init__.py +0 -0
  11. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/udata_hydra/analysis/errors.py +0 -0
  12. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/udata_hydra/analysis/helpers.py +0 -0
  13. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/udata_hydra/app.py +0 -0
  14. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/udata_hydra/cli.py +0 -0
  15. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/udata_hydra/config_default.toml +0 -0
  16. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/udata_hydra/context.py +0 -0
  17. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/udata_hydra/crawl/__init__.py +0 -0
  18. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/udata_hydra/crawl/check_resources.py +0 -0
  19. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/udata_hydra/crawl/process_check_data.py +0 -0
  20. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/udata_hydra/crawl/select_batch.py +0 -0
  21. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/udata_hydra/db/__init__.py +0 -0
  22. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/udata_hydra/db/check.py +0 -0
  23. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/udata_hydra/db/resource.py +0 -0
  24. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/udata_hydra/db/resource_exception.py +0 -0
  25. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/udata_hydra/logger.py +0 -0
  26. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/udata_hydra/migrations/__init__.py +0 -0
  27. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/udata_hydra/migrations/csv/20221205_initial_up_rev1.sql +0 -0
  28. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/udata_hydra/migrations/csv/20230130_drop_migrations.sql +0 -0
  29. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/udata_hydra/migrations/csv/20230206_datetime_aware.sql +0 -0
  30. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/udata_hydra/migrations/csv/20240827_add_indexes_column_to_tables_index_table.sql +0 -0
  31. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/udata_hydra/migrations/main/20221205_initial_up_rev1.sql +0 -0
  32. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/udata_hydra/migrations/main/20221206_rev1_up_rev2.sql +0 -0
  33. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/udata_hydra/migrations/main/20221206_rev2_up_rev3.sql +0 -0
  34. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/udata_hydra/migrations/main/20221208_rev3_up_rev4.sql +0 -0
  35. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/udata_hydra/migrations/main/20221208_rev4_up_rev5.sql +0 -0
  36. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/udata_hydra/migrations/main/20230119_rev5_up_rev6.sql +0 -0
  37. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/udata_hydra/migrations/main/20230121_rev6_up_rev7.sql +0 -0
  38. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/udata_hydra/migrations/main/20230121_rev7_up_rev8.sql +0 -0
  39. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/udata_hydra/migrations/main/20230130_drop_migrations.sql +0 -0
  40. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/udata_hydra/migrations/main/20230206_datetime_aware.sql +0 -0
  41. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/udata_hydra/migrations/main/20230515_rev8_up_rev9.sql +0 -0
  42. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/udata_hydra/migrations/main/20230606_rev9_up_rev10.sql +0 -0
  43. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/udata_hydra/migrations/main/20231102_drop_csv_analysis.sql +0 -0
  44. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/udata_hydra/migrations/main/20240827_add_resources_exceptions_table.sql +0 -0
  45. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/udata_hydra/migrations/main/20240926_add_indexes.sql +0 -0
  46. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/udata_hydra/migrations/main/20241004_add_comment_column_to_resources_exceptions.sql +0 -0
  47. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/udata_hydra/migrations/main/20241021_add_parquet_columns.sql +0 -0
  48. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/udata_hydra/migrations/main/20241023_alter_foreign_key.sql +0 -0
  49. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/udata_hydra/routes/__init__.py +0 -0
  50. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/udata_hydra/routes/checks.py +0 -0
  51. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/udata_hydra/routes/resources.py +0 -0
  52. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/udata_hydra/routes/resources_exceptions.py +0 -0
  53. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/udata_hydra/routes/resources_legacy.py +0 -0
  54. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/udata_hydra/schemas/__init__.py +0 -0
  55. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/udata_hydra/schemas/check.py +0 -0
  56. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/udata_hydra/schemas/resource.py +0 -0
  57. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/udata_hydra/schemas/resource_exception.py +0 -0
  58. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/udata_hydra/utils/__init__.py +0 -0
  59. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/udata_hydra/utils/csv.py +0 -0
  60. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/udata_hydra/utils/db.py +0 -0
  61. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/udata_hydra/utils/file.py +0 -0
  62. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/udata_hydra/utils/http.py +0 -0
  63. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/udata_hydra/utils/minio.py +0 -0
  64. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/udata_hydra/utils/parquet.py +0 -0
  65. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/udata_hydra/utils/queue.py +0 -0
  66. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/udata_hydra/utils/reader.py +0 -0
  67. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/udata_hydra/utils/timer.py +0 -0
  68. {udata_hydra-2.0.5.dev5384 → udata_hydra-2.0.5.dev5485}/udata_hydra/worker.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: udata-hydra
3
- Version: 2.0.5.dev5384
3
+ Version: 2.0.5.dev5485
4
4
  Summary: Async crawler and parsing service for data.gouv.fr
5
5
  License: MIT
6
6
  Author: Opendata Team
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "udata-hydra"
3
- version = "2.0.5.dev5384"
3
+ version = "2.0.5.dev5485"
4
4
  description = "Async crawler and parsing service for data.gouv.fr"
5
5
  authors = ["Opendata Team <opendatateam@data.gouv.fr>"]
6
6
  license = "MIT"
@@ -285,13 +285,13 @@ async def csv_to_parquet(
285
285
  Convert a csv file to parquet using inspection data.
286
286
 
287
287
  Args:
288
- :file_path: CSV file path to convert
289
- :inspection: CSV detective report
290
- :table_name: used to name the parquet file
288
+ file_path: CSV file path to convert.
289
+ inspection: CSV detective report.
290
+ table_name: used to name the parquet file.
291
291
 
292
292
  Returns:
293
- :parquet_url: URL of the parquet file
294
- :parquet_size: size of the parquet file
293
+ parquet_url: URL of the parquet file.
294
+ parquet_size: size of the parquet file.
295
295
  """
296
296
  if not config.CSV_TO_PARQUET:
297
297
  log.debug("CSV_TO_PARQUET turned off, skipping parquet export.")
@@ -260,7 +260,7 @@ async def detect_resource_change_on_early_hints(
260
260
 
261
261
 
262
262
  async def detect_resource_change_from_harvest(
263
- checks_data: dict, resource_id: str
263
+ checks_data: list, resource_id: str
264
264
  ) -> tuple[Change, dict | None]:
265
265
  """
266
266
  Checks if resource has a harvest.modified_at
@@ -22,8 +22,7 @@ async def get_content_type_from_header(headers: dict) -> str:
22
22
 
23
23
 
24
24
  def convert_headers(headers: CIMultiDictProxy) -> dict:
25
- """
26
- Convert headers from aiohttp CIMultiDict type to dict type
25
+ """Convert headers from aiohttp CIMultiDict type to dict type.
27
26
 
28
27
  :warning: this will only take the first value for a given header key but multidict is not json serializable
29
28
  """
@@ -68,7 +67,10 @@ async def is_domain_backoff(domain: str) -> tuple[bool, str]:
68
67
  """Check if we should not crawl on this domain, in order to avoid 429 errors/bans as much as we can. We backoff if:
69
68
  - we have hit a 429
70
69
  - we have hit the rate limit on our side
71
- Returns a tuple with if it should backoff or not (boolean) and the reason why (string)
70
+
71
+ Returns:
72
+ A boolean indicating if it should backoff or not
73
+ A string with the message why we should backoff
72
74
  """
73
75
  backoff: tuple = (False, "")
74
76
 
@@ -95,7 +95,7 @@ async def get_stats(request: web.Request) -> web.Response:
95
95
  """
96
96
  stats_status = await request.app["pool"].fetchrow(q)
97
97
 
98
- def cmp_rate(key):
98
+ def cmp_rate(key: str) -> float | int:
99
99
  if stats_catalog["count_checked"] == 0:
100
100
  return 0
101
101
  return round(stats_status[key] / stats_catalog["count_checked"] * 100, 1)
@@ -24,13 +24,13 @@ def token_auth_middleware(
24
24
  Token auth middleware that checks the "Authorization" http header for token and, if the token in the requet headers is valid, then middleware adds the user to request with key that contain the "request_property" variable, else it will raise an HTTPForbiddenexception.
25
25
 
26
26
  Args:
27
- request_property (str, optional): Key for save in request object.
27
+ request_property: Key for save in request object.
28
28
  Defaults to 'user'.
29
- auth_scheme (str, optional): Prefix for value in "Authorization" header.
29
+ auth_scheme: Prefix for value in "Authorization" header.
30
30
  Defaults to 'Bearer'.
31
- exclude_routes: (tuple, optional): Tuple of pathes that will be excluded.
31
+ exclude_routes: Tuple of pathes that will be excluded.
32
32
  Defaults to empty tuple.
33
- exclude_methods(tuple, optional): Tuple of http methods that will be
33
+ exclude_methods: Tuple of http methods that will be
34
34
  excluded. Defaults to empty tuple.
35
35
 
36
36
  Raises:
@@ -38,7 +38,7 @@ def token_auth_middleware(
38
38
  web.HTTPForbidden: Wrong token, schema or header.
39
39
 
40
40
  Returns:
41
- Coroutine: Aiohttp middleware.
41
+ Aiohttp middleware.
42
42
  """
43
43
 
44
44
  @web.middleware