udata-hydra 2.2.2.dev7533__tar.gz → 2.2.2.dev7551__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/PKG-INFO +2 -2
  2. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/pyproject.toml +2 -2
  3. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/analysis/csv.py +3 -1
  4. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/analysis/helpers.py +9 -4
  5. udata_hydra-2.2.2.dev7551/udata_hydra/migrations/csv/20250626_delete_datetime_iso_references.sql +8 -0
  6. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/README.md +0 -0
  7. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/__init__.py +0 -0
  8. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/analysis/__init__.py +0 -0
  9. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/analysis/geojson.py +0 -0
  10. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/analysis/resource.py +0 -0
  11. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/app.py +0 -0
  12. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/cli.py +0 -0
  13. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/config_default.toml +0 -0
  14. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/context.py +0 -0
  15. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/crawl/__init__.py +0 -0
  16. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/crawl/calculate_next_check.py +0 -0
  17. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/crawl/check_resources.py +0 -0
  18. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/crawl/helpers.py +0 -0
  19. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/crawl/preprocess_check_data.py +0 -0
  20. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/crawl/select_batch.py +0 -0
  21. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/db/__init__.py +0 -0
  22. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/db/check.py +0 -0
  23. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/db/resource.py +0 -0
  24. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/db/resource_exception.py +0 -0
  25. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/logger.py +0 -0
  26. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/migrations/__init__.py +0 -0
  27. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/migrations/csv/20221205_initial_up_rev1.sql +0 -0
  28. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/migrations/csv/20230130_drop_migrations.sql +0 -0
  29. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/migrations/csv/20230206_datetime_aware.sql +0 -0
  30. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/migrations/csv/20240827_add_indexes_column_to_tables_index_table.sql +0 -0
  31. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/migrations/csv/20250610_migrate_resources_exception.sql +0 -0
  32. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/migrations/main/20221205_initial_up_rev1.sql +0 -0
  33. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/migrations/main/20221206_rev1_up_rev2.sql +0 -0
  34. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/migrations/main/20221206_rev2_up_rev3.sql +0 -0
  35. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/migrations/main/20221208_rev3_up_rev4.sql +0 -0
  36. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/migrations/main/20221208_rev4_up_rev5.sql +0 -0
  37. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/migrations/main/20230119_rev5_up_rev6.sql +0 -0
  38. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/migrations/main/20230121_rev6_up_rev7.sql +0 -0
  39. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/migrations/main/20230121_rev7_up_rev8.sql +0 -0
  40. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/migrations/main/20230130_drop_migrations.sql +0 -0
  41. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/migrations/main/20230206_datetime_aware.sql +0 -0
  42. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/migrations/main/20230515_rev8_up_rev9.sql +0 -0
  43. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/migrations/main/20230606_rev9_up_rev10.sql +0 -0
  44. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/migrations/main/20231102_drop_csv_analysis.sql +0 -0
  45. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/migrations/main/20240827_add_resources_exceptions_table.sql +0 -0
  46. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/migrations/main/20240926_add_indexes.sql +0 -0
  47. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/migrations/main/20241004_add_comment_column_to_resources_exceptions.sql +0 -0
  48. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/migrations/main/20241021_add_parquet_columns.sql +0 -0
  49. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/migrations/main/20241023_alter_foreign_key.sql +0 -0
  50. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/migrations/main/20241025_add_next_check_column.sql +0 -0
  51. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/migrations/main/20250108_add_indexes.sql +0 -0
  52. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/migrations/main/20250130_add_pmtiles_fields.sql +0 -0
  53. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/migrations/main/20250519_add_format_column_catalog.sql +0 -0
  54. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/migrations/main/20250610_migrate_resources_exception.sql +0 -0
  55. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/routes/__init__.py +0 -0
  56. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/routes/checks.py +0 -0
  57. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/routes/resources.py +0 -0
  58. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/routes/resources_exceptions.py +0 -0
  59. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/routes/status.py +0 -0
  60. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/schemas/__init__.py +0 -0
  61. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/schemas/check.py +0 -0
  62. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/schemas/resource.py +0 -0
  63. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/schemas/resource_exception.py +0 -0
  64. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/utils/__init__.py +0 -0
  65. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/utils/auth.py +0 -0
  66. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/utils/csv.py +0 -0
  67. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/utils/db.py +0 -0
  68. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/utils/errors.py +0 -0
  69. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/utils/file.py +0 -0
  70. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/utils/geojson.py +0 -0
  71. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/utils/http.py +0 -0
  72. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/utils/minio.py +0 -0
  73. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/utils/parquet.py +0 -0
  74. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/utils/queue.py +0 -0
  75. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/utils/reader.py +0 -0
  76. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/utils/timer.py +0 -0
  77. {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/worker.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: udata-hydra
3
- Version: 2.2.2.dev7533
3
+ Version: 2.2.2.dev7551
4
4
  Summary: Async crawler and parsing service for data.gouv.fr
5
5
  License: MIT
6
6
  Author: Opendata Team
@@ -18,7 +18,7 @@ Requires-Dist: aioresponses (>=0.7.3) ; extra == "dev"
18
18
  Requires-Dist: asyncpg (>=0.29.0)
19
19
  Requires-Dist: bumpx (>=0.3.10) ; extra == "dev"
20
20
  Requires-Dist: coloredlogs (>=15.0.1)
21
- Requires-Dist: csv-detective (==0.8.0)
21
+ Requires-Dist: csv-detective (==0.8.1.dev1549)
22
22
  Requires-Dist: dateparser (>=1.1.7)
23
23
  Requires-Dist: gunicorn (>=20.1.0) ; extra == "dev"
24
24
  Requires-Dist: humanfriendly (>=10.0)
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "udata-hydra"
3
- version = "2.2.2.dev7533"
3
+ version = "2.2.2.dev7551"
4
4
  description = "Async crawler and parsing service for data.gouv.fr"
5
5
  authors = [{ name = "Opendata Team", email = "opendatateam@data.gouv.fr" }]
6
6
  dependencies = [
@@ -8,7 +8,7 @@ dependencies = [
8
8
  "aiohttp>=3.10.3",
9
9
  "asyncpg>=0.29.0",
10
10
  "coloredlogs>=15.0.1",
11
- "csv-detective==0.8.0",
11
+ "csv-detective==0.8.1.dev1549",
12
12
  "dateparser>=1.1.7",
13
13
  "humanfriendly>=10.0",
14
14
  "marshmallow>=3.14.1",
@@ -68,6 +68,7 @@ PYTHON_TYPE_TO_PG = {
68
68
  "json": JSON,
69
69
  "date": Date,
70
70
  "datetime": DateTime,
71
+ "datetime_aware": DateTime(timezone=True),
71
72
  }
72
73
 
73
74
  PYTHON_TYPE_TO_PY = {
@@ -78,6 +79,7 @@ PYTHON_TYPE_TO_PY = {
78
79
  "json": helpers.to_json,
79
80
  "date": helpers.to_date,
80
81
  "datetime": helpers.to_datetime,
82
+ "datetime_aware": helpers.to_datetime,
81
83
  }
82
84
 
83
85
  RESERVED_COLS = ("__id", "cmin", "cmax", "collation", "ctid", "tableoid", "xmin", "xmax")
@@ -376,7 +378,7 @@ async def csv_to_db(
376
378
 
377
379
  # build a `column_name: type` mapping and explicitely rename reserved column names
378
380
  columns = {
379
- f"{c}__hydra_renamed" if c.lower() in RESERVED_COLS else c: v["python_type"]
381
+ f"{c}__hydra_renamed" if c.lower() in RESERVED_COLS else c: helpers.get_python_type(v)
380
382
  for c, v in inspection["columns"].items()
381
383
  }
382
384
 
@@ -16,7 +16,7 @@ def to_json(value: str) -> str:
16
16
  return value
17
17
 
18
18
 
19
- def _parse_dt(value: str) -> datetime | None:
19
+ def to_datetime(value: str) -> datetime | None:
20
20
  """For performance reasons, we try first with dateutil and fallback on dateparser"""
21
21
  try:
22
22
  return dateutil_parser(value)
@@ -25,12 +25,17 @@ def _parse_dt(value: str) -> datetime | None:
25
25
 
26
26
 
27
27
  def to_date(value: str) -> date | None:
28
- parsed = _parse_dt(value)
28
+ parsed = to_datetime(value)
29
29
  return parsed.date() if parsed else None
30
30
 
31
31
 
32
- def to_datetime(value: str) -> datetime | None:
33
- return _parse_dt(value)
32
+ def get_python_type(column: dict) -> str:
33
+ """Outsourcing the distinction of aware datetimes"""
34
+ return (
35
+ "datetime_aware"
36
+ if column["format"] in {"datetime_aware", "datetime_rfc822"}
37
+ else column["python_type"]
38
+ )
34
39
 
35
40
 
36
41
  async def read_or_download_file(
@@ -0,0 +1,8 @@
1
+ -- the `datetime_iso` format is not in csv-detective anymore (https://github.com/datagouv/csv-detective/pull/132)
2
+ -- to prevent crashes, we replace all references to this format with `datetime_aware`
3
+ -- NB: it doesn't matter if it's the right format, if it's not, validation will fail
4
+ -- and a new analysis will find the right format
5
+
6
+ UPDATE tables_index
7
+ SET csv_detective = replace(csv_detective::text, 'datetime_iso', 'datetime_aware')::jsonb
8
+ WHERE csv_detective::text LIKE '%datetime_iso%';