udata-hydra 2.2.2.dev7533__tar.gz → 2.2.2.dev7551__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/PKG-INFO +2 -2
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/pyproject.toml +2 -2
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/analysis/csv.py +3 -1
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/analysis/helpers.py +9 -4
- udata_hydra-2.2.2.dev7551/udata_hydra/migrations/csv/20250626_delete_datetime_iso_references.sql +8 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/README.md +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/__init__.py +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/analysis/__init__.py +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/analysis/geojson.py +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/analysis/resource.py +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/app.py +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/cli.py +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/config_default.toml +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/context.py +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/crawl/__init__.py +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/crawl/calculate_next_check.py +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/crawl/check_resources.py +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/crawl/helpers.py +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/crawl/preprocess_check_data.py +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/crawl/select_batch.py +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/db/__init__.py +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/db/check.py +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/db/resource.py +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/db/resource_exception.py +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/logger.py +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/migrations/__init__.py +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/migrations/csv/20221205_initial_up_rev1.sql +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/migrations/csv/20230130_drop_migrations.sql +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/migrations/csv/20230206_datetime_aware.sql +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/migrations/csv/20240827_add_indexes_column_to_tables_index_table.sql +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/migrations/csv/20250610_migrate_resources_exception.sql +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/migrations/main/20221205_initial_up_rev1.sql +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/migrations/main/20221206_rev1_up_rev2.sql +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/migrations/main/20221206_rev2_up_rev3.sql +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/migrations/main/20221208_rev3_up_rev4.sql +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/migrations/main/20221208_rev4_up_rev5.sql +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/migrations/main/20230119_rev5_up_rev6.sql +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/migrations/main/20230121_rev6_up_rev7.sql +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/migrations/main/20230121_rev7_up_rev8.sql +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/migrations/main/20230130_drop_migrations.sql +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/migrations/main/20230206_datetime_aware.sql +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/migrations/main/20230515_rev8_up_rev9.sql +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/migrations/main/20230606_rev9_up_rev10.sql +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/migrations/main/20231102_drop_csv_analysis.sql +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/migrations/main/20240827_add_resources_exceptions_table.sql +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/migrations/main/20240926_add_indexes.sql +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/migrations/main/20241004_add_comment_column_to_resources_exceptions.sql +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/migrations/main/20241021_add_parquet_columns.sql +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/migrations/main/20241023_alter_foreign_key.sql +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/migrations/main/20241025_add_next_check_column.sql +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/migrations/main/20250108_add_indexes.sql +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/migrations/main/20250130_add_pmtiles_fields.sql +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/migrations/main/20250519_add_format_column_catalog.sql +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/migrations/main/20250610_migrate_resources_exception.sql +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/routes/__init__.py +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/routes/checks.py +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/routes/resources.py +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/routes/resources_exceptions.py +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/routes/status.py +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/schemas/__init__.py +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/schemas/check.py +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/schemas/resource.py +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/schemas/resource_exception.py +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/utils/__init__.py +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/utils/auth.py +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/utils/csv.py +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/utils/db.py +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/utils/errors.py +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/utils/file.py +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/utils/geojson.py +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/utils/http.py +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/utils/minio.py +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/utils/parquet.py +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/utils/queue.py +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/utils/reader.py +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/utils/timer.py +0 -0
- {udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/worker.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.3
|
|
2
2
|
Name: udata-hydra
|
|
3
|
-
Version: 2.2.2.
|
|
3
|
+
Version: 2.2.2.dev7551
|
|
4
4
|
Summary: Async crawler and parsing service for data.gouv.fr
|
|
5
5
|
License: MIT
|
|
6
6
|
Author: Opendata Team
|
|
@@ -18,7 +18,7 @@ Requires-Dist: aioresponses (>=0.7.3) ; extra == "dev"
|
|
|
18
18
|
Requires-Dist: asyncpg (>=0.29.0)
|
|
19
19
|
Requires-Dist: bumpx (>=0.3.10) ; extra == "dev"
|
|
20
20
|
Requires-Dist: coloredlogs (>=15.0.1)
|
|
21
|
-
Requires-Dist: csv-detective (==0.8.
|
|
21
|
+
Requires-Dist: csv-detective (==0.8.1.dev1549)
|
|
22
22
|
Requires-Dist: dateparser (>=1.1.7)
|
|
23
23
|
Requires-Dist: gunicorn (>=20.1.0) ; extra == "dev"
|
|
24
24
|
Requires-Dist: humanfriendly (>=10.0)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[project]
|
|
2
2
|
name = "udata-hydra"
|
|
3
|
-
version = "2.2.2.
|
|
3
|
+
version = "2.2.2.dev7551"
|
|
4
4
|
description = "Async crawler and parsing service for data.gouv.fr"
|
|
5
5
|
authors = [{ name = "Opendata Team", email = "opendatateam@data.gouv.fr" }]
|
|
6
6
|
dependencies = [
|
|
@@ -8,7 +8,7 @@ dependencies = [
|
|
|
8
8
|
"aiohttp>=3.10.3",
|
|
9
9
|
"asyncpg>=0.29.0",
|
|
10
10
|
"coloredlogs>=15.0.1",
|
|
11
|
-
"csv-detective==0.8.
|
|
11
|
+
"csv-detective==0.8.1.dev1549",
|
|
12
12
|
"dateparser>=1.1.7",
|
|
13
13
|
"humanfriendly>=10.0",
|
|
14
14
|
"marshmallow>=3.14.1",
|
|
@@ -68,6 +68,7 @@ PYTHON_TYPE_TO_PG = {
|
|
|
68
68
|
"json": JSON,
|
|
69
69
|
"date": Date,
|
|
70
70
|
"datetime": DateTime,
|
|
71
|
+
"datetime_aware": DateTime(timezone=True),
|
|
71
72
|
}
|
|
72
73
|
|
|
73
74
|
PYTHON_TYPE_TO_PY = {
|
|
@@ -78,6 +79,7 @@ PYTHON_TYPE_TO_PY = {
|
|
|
78
79
|
"json": helpers.to_json,
|
|
79
80
|
"date": helpers.to_date,
|
|
80
81
|
"datetime": helpers.to_datetime,
|
|
82
|
+
"datetime_aware": helpers.to_datetime,
|
|
81
83
|
}
|
|
82
84
|
|
|
83
85
|
RESERVED_COLS = ("__id", "cmin", "cmax", "collation", "ctid", "tableoid", "xmin", "xmax")
|
|
@@ -376,7 +378,7 @@ async def csv_to_db(
|
|
|
376
378
|
|
|
377
379
|
# build a `column_name: type` mapping and explicitely rename reserved column names
|
|
378
380
|
columns = {
|
|
379
|
-
f"{c}__hydra_renamed" if c.lower() in RESERVED_COLS else c: v
|
|
381
|
+
f"{c}__hydra_renamed" if c.lower() in RESERVED_COLS else c: helpers.get_python_type(v)
|
|
380
382
|
for c, v in inspection["columns"].items()
|
|
381
383
|
}
|
|
382
384
|
|
|
@@ -16,7 +16,7 @@ def to_json(value: str) -> str:
|
|
|
16
16
|
return value
|
|
17
17
|
|
|
18
18
|
|
|
19
|
-
def
|
|
19
|
+
def to_datetime(value: str) -> datetime | None:
|
|
20
20
|
"""For performance reasons, we try first with dateutil and fallback on dateparser"""
|
|
21
21
|
try:
|
|
22
22
|
return dateutil_parser(value)
|
|
@@ -25,12 +25,17 @@ def _parse_dt(value: str) -> datetime | None:
|
|
|
25
25
|
|
|
26
26
|
|
|
27
27
|
def to_date(value: str) -> date | None:
|
|
28
|
-
parsed =
|
|
28
|
+
parsed = to_datetime(value)
|
|
29
29
|
return parsed.date() if parsed else None
|
|
30
30
|
|
|
31
31
|
|
|
32
|
-
def
|
|
33
|
-
|
|
32
|
+
def get_python_type(column: dict) -> str:
|
|
33
|
+
"""Outsourcing the distinction of aware datetimes"""
|
|
34
|
+
return (
|
|
35
|
+
"datetime_aware"
|
|
36
|
+
if column["format"] in {"datetime_aware", "datetime_rfc822"}
|
|
37
|
+
else column["python_type"]
|
|
38
|
+
)
|
|
34
39
|
|
|
35
40
|
|
|
36
41
|
async def read_or_download_file(
|
udata_hydra-2.2.2.dev7551/udata_hydra/migrations/csv/20250626_delete_datetime_iso_references.sql
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
-- the `datetime_iso` format is not in csv-detective anymore (https://github.com/datagouv/csv-detective/pull/132)
|
|
2
|
+
-- to prevent crashes, we replace all references to this format with `datetime_aware`
|
|
3
|
+
-- NB: it doesn't matter if it's the right format, if it's not, validation will fail
|
|
4
|
+
-- and a new analysis will find the right format
|
|
5
|
+
|
|
6
|
+
UPDATE tables_index
|
|
7
|
+
SET csv_detective = replace(csv_detective::text, 'datetime_iso', 'datetime_aware')::jsonb
|
|
8
|
+
WHERE csv_detective::text LIKE '%datetime_iso%';
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/crawl/calculate_next_check.py
RENAMED
|
File without changes
|
{udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/crawl/check_resources.py
RENAMED
|
File without changes
|
|
File without changes
|
{udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/crawl/preprocess_check_data.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/db/resource_exception.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/routes/resources_exceptions.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{udata_hydra-2.2.2.dev7533 → udata_hydra-2.2.2.dev7551}/udata_hydra/schemas/resource_exception.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|