udata-hydra 2.4.1.dev9596__tar.gz → 2.4.2.dev9635__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (79) hide show
  1. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/PKG-INFO +1 -1
  2. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/pyproject.toml +1 -1
  3. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/__init__.py +7 -0
  4. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/config_default.toml +1 -1
  5. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/crawl/check_resources.py +2 -1
  6. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/routes/checks.py +2 -1
  7. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/utils/file.py +1 -1
  8. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/utils/http.py +1 -0
  9. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/README.md +0 -0
  10. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/analysis/__init__.py +0 -0
  11. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/analysis/csv.py +0 -0
  12. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/analysis/geojson.py +0 -0
  13. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/analysis/helpers.py +0 -0
  14. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/analysis/resource.py +0 -0
  15. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/app.py +0 -0
  16. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/cli.py +0 -0
  17. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/context.py +0 -0
  18. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/crawl/__init__.py +0 -0
  19. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/crawl/calculate_next_check.py +0 -0
  20. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/crawl/helpers.py +0 -0
  21. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/crawl/preprocess_check_data.py +0 -0
  22. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/crawl/select_batch.py +0 -0
  23. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/db/__init__.py +0 -0
  24. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/db/check.py +0 -0
  25. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/db/resource.py +0 -0
  26. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/db/resource_exception.py +0 -0
  27. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/logger.py +0 -0
  28. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/migrations/__init__.py +0 -0
  29. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/migrations/csv/20221205_initial_up_rev1.sql +0 -0
  30. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/migrations/csv/20230130_drop_migrations.sql +0 -0
  31. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/migrations/csv/20230206_datetime_aware.sql +0 -0
  32. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/migrations/csv/20240827_add_indexes_column_to_tables_index_table.sql +0 -0
  33. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/migrations/csv/20250610_migrate_resources_exception.sql +0 -0
  34. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/migrations/csv/20250626_delete_datetime_iso_references.sql +0 -0
  35. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/migrations/csv/20250902_delete_analyses_too_long_column_names.sql +0 -0
  36. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/migrations/main/20221205_initial_up_rev1.sql +0 -0
  37. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/migrations/main/20221206_rev1_up_rev2.sql +0 -0
  38. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/migrations/main/20221206_rev2_up_rev3.sql +0 -0
  39. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/migrations/main/20221208_rev3_up_rev4.sql +0 -0
  40. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/migrations/main/20221208_rev4_up_rev5.sql +0 -0
  41. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/migrations/main/20230119_rev5_up_rev6.sql +0 -0
  42. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/migrations/main/20230121_rev6_up_rev7.sql +0 -0
  43. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/migrations/main/20230121_rev7_up_rev8.sql +0 -0
  44. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/migrations/main/20230130_drop_migrations.sql +0 -0
  45. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/migrations/main/20230206_datetime_aware.sql +0 -0
  46. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/migrations/main/20230515_rev8_up_rev9.sql +0 -0
  47. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/migrations/main/20230606_rev9_up_rev10.sql +0 -0
  48. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/migrations/main/20231102_drop_csv_analysis.sql +0 -0
  49. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/migrations/main/20240827_add_resources_exceptions_table.sql +0 -0
  50. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/migrations/main/20240926_add_indexes.sql +0 -0
  51. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/migrations/main/20241004_add_comment_column_to_resources_exceptions.sql +0 -0
  52. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/migrations/main/20241021_add_parquet_columns.sql +0 -0
  53. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/migrations/main/20241023_alter_foreign_key.sql +0 -0
  54. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/migrations/main/20241025_add_next_check_column.sql +0 -0
  55. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/migrations/main/20250108_add_indexes.sql +0 -0
  56. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/migrations/main/20250130_add_pmtiles_fields.sql +0 -0
  57. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/migrations/main/20250519_add_format_column_catalog.sql +0 -0
  58. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/migrations/main/20250610_migrate_resources_exception.sql +0 -0
  59. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/migrations/main/20250611_add_status_since_catalog.sql +0 -0
  60. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/migrations/main/20250615_add_geojson_fields.sql +0 -0
  61. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/routes/__init__.py +0 -0
  62. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/routes/resources.py +0 -0
  63. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/routes/resources_exceptions.py +0 -0
  64. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/routes/status.py +0 -0
  65. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/schemas/__init__.py +0 -0
  66. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/schemas/check.py +0 -0
  67. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/schemas/resource.py +0 -0
  68. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/schemas/resource_exception.py +0 -0
  69. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/utils/__init__.py +0 -0
  70. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/utils/auth.py +0 -0
  71. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/utils/csv.py +0 -0
  72. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/utils/db.py +0 -0
  73. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/utils/errors.py +0 -0
  74. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/utils/geojson.py +0 -0
  75. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/utils/minio.py +0 -0
  76. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/utils/parquet.py +0 -0
  77. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/utils/queue.py +0 -0
  78. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/utils/timer.py +0 -0
  79. {udata_hydra-2.4.1.dev9596 → udata_hydra-2.4.2.dev9635}/udata_hydra/worker.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: udata-hydra
3
- Version: 2.4.1.dev9596
3
+ Version: 2.4.2.dev9635
4
4
  Summary: Async crawler and parsing service for data.gouv.fr
5
5
  License: MIT
6
6
  Author: Opendata Team
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "udata-hydra"
3
- version = "2.4.1.dev9596"
3
+ version = "2.4.2.dev9635"
4
4
  description = "Async crawler and parsing service for data.gouv.fr"
5
5
  authors = [{ name = "Opendata Team", email = "opendatateam@data.gouv.fr" }]
6
6
  dependencies = [
@@ -49,5 +49,12 @@ class Configurator:
49
49
  def __dict__(self):
50
50
  return self.configuration
51
51
 
52
+ @property
53
+ def USER_AGENT_FULL(self) -> str:
54
+ """Build the complete user agent string with version"""
55
+ if self.USER_AGENT and self.APP_VERSION:
56
+ return f"{self.USER_AGENT}/{self.APP_VERSION}"
57
+ return "udata-hydra"
58
+
52
59
 
53
60
  config = Configurator()
@@ -13,7 +13,7 @@ SENTRY_SAMPLE_RATE = 1.0
13
13
  TESTING = false
14
14
  # max postgres pool size
15
15
  MAX_POOL_SIZE = 50
16
- USER_AGENT = "udata-hydra/1.0"
16
+ USER_AGENT = "udata-hydra" # without version - version is dynamically added
17
17
  NAMEDATALEN = 64 # should be set to the same value as in Postgres, but we can't query it
18
18
 
19
19
  API_KEY = "hydra_api_key_to_change"
@@ -36,7 +36,8 @@ async def check_batch_resources(to_parse: list[Record]) -> None:
36
36
  context.monitor().set_status("Checking resources...")
37
37
  tasks: list = []
38
38
  async with aiohttp.ClientSession(
39
- timeout=None, headers={"user-agent": config.USER_AGENT}
39
+ timeout=None,
40
+ headers={"user-agent": config.USER_AGENT_FULL},
40
41
  ) as session:
41
42
  for row in to_parse:
42
43
  tasks.append(
@@ -77,7 +77,8 @@ async def create_check(request: web.Request) -> web.Response:
77
77
  context.monitor().set_status(f'Crawling url "{url}"...')
78
78
 
79
79
  async with aiohttp.ClientSession(
80
- timeout=None, headers={"user-agent": config.USER_AGENT}
80
+ timeout=None,
81
+ headers={"user-agent": config.USER_AGENT_FULL},
81
82
  ) as session:
82
83
  status: str = await check_resource(
83
84
  url=url,
@@ -60,7 +60,7 @@ async def download_resource(
60
60
  too_large, download_error = False, None
61
61
  try:
62
62
  async with aiohttp.ClientSession(
63
- headers={"user-agent": config.USER_AGENT} if config.USER_AGENT else None,
63
+ headers={"user-agent": config.USER_AGENT_FULL},
64
64
  raise_for_status=True,
65
65
  ) as session:
66
66
  async with session.get(url, allow_redirects=True) as response:
@@ -79,6 +79,7 @@ async def send(dataset_id: str, resource_id: str, document: UdataPayload) -> Non
79
79
 
80
80
  uri = f"{config.UDATA_URI}/datasets/{dataset_id}/resources/{resource_id}/extras/"
81
81
  headers = {
82
+ "user-agent": config.USER_AGENT_FULL,
82
83
  "content-type": "application/json",
83
84
  "X-API-KEY": config.UDATA_URI_API_KEY,
84
85
  }