udata-hydra 2.2.1.dev7337__tar.gz → 2.2.1.dev7367__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/PKG-INFO +1 -1
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/pyproject.toml +1 -1
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/analysis/geojson.py +0 -1
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/cli.py +84 -12
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/README.md +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/__init__.py +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/analysis/__init__.py +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/analysis/csv.py +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/analysis/helpers.py +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/analysis/resource.py +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/app.py +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/config_default.toml +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/context.py +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/crawl/__init__.py +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/crawl/calculate_next_check.py +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/crawl/check_resources.py +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/crawl/helpers.py +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/crawl/preprocess_check_data.py +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/crawl/select_batch.py +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/db/__init__.py +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/db/check.py +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/db/resource.py +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/db/resource_exception.py +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/logger.py +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/migrations/__init__.py +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/migrations/csv/20221205_initial_up_rev1.sql +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/migrations/csv/20230130_drop_migrations.sql +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/migrations/csv/20230206_datetime_aware.sql +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/migrations/csv/20240827_add_indexes_column_to_tables_index_table.sql +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/migrations/main/20221205_initial_up_rev1.sql +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/migrations/main/20221206_rev1_up_rev2.sql +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/migrations/main/20221206_rev2_up_rev3.sql +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/migrations/main/20221208_rev3_up_rev4.sql +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/migrations/main/20221208_rev4_up_rev5.sql +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/migrations/main/20230119_rev5_up_rev6.sql +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/migrations/main/20230121_rev6_up_rev7.sql +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/migrations/main/20230121_rev7_up_rev8.sql +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/migrations/main/20230130_drop_migrations.sql +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/migrations/main/20230206_datetime_aware.sql +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/migrations/main/20230515_rev8_up_rev9.sql +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/migrations/main/20230606_rev9_up_rev10.sql +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/migrations/main/20231102_drop_csv_analysis.sql +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/migrations/main/20240827_add_resources_exceptions_table.sql +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/migrations/main/20240926_add_indexes.sql +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/migrations/main/20241004_add_comment_column_to_resources_exceptions.sql +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/migrations/main/20241021_add_parquet_columns.sql +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/migrations/main/20241023_alter_foreign_key.sql +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/migrations/main/20241025_add_next_check_column.sql +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/migrations/main/20250108_add_indexes.sql +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/migrations/main/20250130_add_pmtiles_fields.sql +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/routes/__init__.py +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/routes/checks.py +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/routes/resources.py +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/routes/resources_exceptions.py +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/routes/status.py +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/schemas/__init__.py +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/schemas/check.py +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/schemas/resource.py +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/schemas/resource_exception.py +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/utils/__init__.py +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/utils/auth.py +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/utils/csv.py +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/utils/db.py +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/utils/errors.py +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/utils/file.py +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/utils/geojson.py +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/utils/http.py +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/utils/minio.py +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/utils/parquet.py +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/utils/queue.py +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/utils/reader.py +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/utils/timer.py +0 -0
- {udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/worker.py +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import csv
|
|
2
2
|
import logging
|
|
3
3
|
import os
|
|
4
|
-
from datetime import datetime, timezone
|
|
4
|
+
from datetime import datetime, timedelta, timezone
|
|
5
5
|
from pathlib import Path
|
|
6
6
|
from tempfile import NamedTemporaryFile
|
|
7
7
|
|
|
@@ -15,6 +15,7 @@ from progressist import ProgressBar
|
|
|
15
15
|
from udata_hydra import config
|
|
16
16
|
from udata_hydra.analysis.csv import analyse_csv
|
|
17
17
|
from udata_hydra.analysis.geojson import analyse_geojson
|
|
18
|
+
from udata_hydra.analysis.resource import analyse_resource
|
|
18
19
|
from udata_hydra.crawl.check_resources import check_resource as crawl_check_resource
|
|
19
20
|
from udata_hydra.db.check import Check
|
|
20
21
|
from udata_hydra.db.resource import Resource
|
|
@@ -159,6 +160,16 @@ async def check_resource(resource_id: str, method: str = "get", force_analysis:
|
|
|
159
160
|
)
|
|
160
161
|
|
|
161
162
|
|
|
163
|
+
@cli(name="analyse-resource")
|
|
164
|
+
async def analyse_resource_cli(resource_id: str):
|
|
165
|
+
"""Trigger a resource analysis, mainly useful for local debug (with breakpoints)"""
|
|
166
|
+
check: Record | None = await Check.get_by_resource_id(resource_id)
|
|
167
|
+
if not check:
|
|
168
|
+
log.error("Could not find a check linked to the specified resource ID")
|
|
169
|
+
return
|
|
170
|
+
await analyse_resource(check=check, last_check=None, force_analysis=True)
|
|
171
|
+
|
|
172
|
+
|
|
162
173
|
@cli(name="analyse-csv")
|
|
163
174
|
async def analyse_csv_cli(
|
|
164
175
|
check_id: str | None = None,
|
|
@@ -349,23 +360,27 @@ async def purge_csv_tables(quiet: bool = False) -> None:
|
|
|
349
360
|
ON checks.parsing_table = md5(c.url)
|
|
350
361
|
WHERE checks.parsing_table IS NOT NULL AND (c.id IS NULL OR c.deleted = TRUE);
|
|
351
362
|
"""
|
|
352
|
-
|
|
353
|
-
res: list[Record] = await
|
|
363
|
+
conn_main = await connection()
|
|
364
|
+
res: list[Record] = await conn_main.fetch(q)
|
|
354
365
|
tables_to_delete: list[str] = [r["parsing_table"] for r in res]
|
|
355
366
|
|
|
356
367
|
success_count = 0
|
|
357
368
|
error_count = 0
|
|
358
369
|
|
|
370
|
+
conn_csv = await connection(db_name="csv")
|
|
359
371
|
for table in tables_to_delete:
|
|
360
372
|
try:
|
|
361
|
-
async with
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
373
|
+
async with conn_main.transaction():
|
|
374
|
+
async with conn_csv.transaction():
|
|
375
|
+
log.debug(f'Deleting table "{table}"')
|
|
376
|
+
await conn_csv.execute(f'DROP TABLE IF EXISTS "{table}"')
|
|
377
|
+
await conn_main.execute(
|
|
378
|
+
"DELETE FROM tables_index WHERE parsing_table = $1", table
|
|
379
|
+
)
|
|
380
|
+
await conn_main.execute(
|
|
381
|
+
"UPDATE checks SET parsing_table = NULL WHERE parsing_table = $1", table
|
|
382
|
+
)
|
|
383
|
+
success_count += 1
|
|
369
384
|
except Exception as e:
|
|
370
385
|
error_count += 1
|
|
371
386
|
log.error(f'Failed to delete table "{table}": {str(e)}')
|
|
@@ -417,7 +432,10 @@ async def insert_resource_into_catalog(resource_id: str):
|
|
|
417
432
|
datetime.fromisoformat(resource["resource"]["harvest"]["modified_at"]).replace(
|
|
418
433
|
tzinfo=timezone.utc
|
|
419
434
|
)
|
|
420
|
-
if
|
|
435
|
+
if (
|
|
436
|
+
resource["resource"].get("harvest") is not None
|
|
437
|
+
and resource["resource"]["harvest"].get("modified_at")
|
|
438
|
+
)
|
|
421
439
|
else None,
|
|
422
440
|
)
|
|
423
441
|
log.info(f"Resource {resource_id} successfully {action}ed into DB.")
|
|
@@ -425,6 +443,60 @@ async def insert_resource_into_catalog(resource_id: str):
|
|
|
425
443
|
raise e
|
|
426
444
|
|
|
427
445
|
|
|
446
|
+
@cli
|
|
447
|
+
async def purge_selected_csv_tables(
|
|
448
|
+
nb_days_to_keep: int | None = None,
|
|
449
|
+
nb_tables_to_keep: int | None = None,
|
|
450
|
+
quiet: bool = False,
|
|
451
|
+
) -> None:
|
|
452
|
+
"""Delete converted CSV tables either:
|
|
453
|
+
- if they're more than nb_days_to_keep days old
|
|
454
|
+
- if they're not in the top nb_table_to_keep most recent
|
|
455
|
+
"""
|
|
456
|
+
if quiet:
|
|
457
|
+
log.setLevel(logging.ERROR)
|
|
458
|
+
|
|
459
|
+
assert nb_days_to_keep is not None or nb_tables_to_keep is not None
|
|
460
|
+
conn_csv = await connection(db_name="csv")
|
|
461
|
+
if nb_days_to_keep is not None:
|
|
462
|
+
threshold = datetime.now(timezone.utc) - timedelta(days=int(nb_days_to_keep))
|
|
463
|
+
q = """SELECT parsing_table FROM tables_index WHERE created_at <= $1"""
|
|
464
|
+
res: list[Record] = await conn_csv.fetch(q, threshold)
|
|
465
|
+
elif nb_tables_to_keep is not None:
|
|
466
|
+
q = """SELECT parsing_table FROM tables_index ORDER BY created_at DESC OFFSET $1"""
|
|
467
|
+
res: list[Record] = await conn_csv.fetch(q, int(nb_tables_to_keep))
|
|
468
|
+
|
|
469
|
+
tables_to_delete: list[str] = [r["parsing_table"] for r in res]
|
|
470
|
+
|
|
471
|
+
success_count = 0
|
|
472
|
+
error_count = 0
|
|
473
|
+
conn_main = await connection()
|
|
474
|
+
for table in tables_to_delete:
|
|
475
|
+
try:
|
|
476
|
+
async with conn_main.transaction():
|
|
477
|
+
async with conn_csv.transaction():
|
|
478
|
+
log.debug(f'Deleting table "{table}"')
|
|
479
|
+
await conn_csv.execute(f'DROP TABLE IF EXISTS "{table}"')
|
|
480
|
+
await conn_csv.execute(
|
|
481
|
+
"DELETE FROM tables_index WHERE parsing_table = $1", table
|
|
482
|
+
)
|
|
483
|
+
await conn_main.execute(
|
|
484
|
+
"UPDATE checks SET parsing_table = NULL WHERE parsing_table = $1", table
|
|
485
|
+
)
|
|
486
|
+
success_count += 1
|
|
487
|
+
except Exception as e:
|
|
488
|
+
error_count += 1
|
|
489
|
+
log.error(f'Failed to delete table "{table}": {str(e)}')
|
|
490
|
+
continue
|
|
491
|
+
|
|
492
|
+
if success_count:
|
|
493
|
+
log.info(f"Successfully deleted {success_count} table(s).")
|
|
494
|
+
if error_count:
|
|
495
|
+
log.warning(f"Failed to delete {error_count} table(s). Check logs for details.")
|
|
496
|
+
if not (success_count or error_count):
|
|
497
|
+
log.info("Nothing to delete.")
|
|
498
|
+
|
|
499
|
+
|
|
428
500
|
@wrap
|
|
429
501
|
async def cli_wrapper():
|
|
430
502
|
context["conn"] = {}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/crawl/calculate_next_check.py
RENAMED
|
File without changes
|
{udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/crawl/check_resources.py
RENAMED
|
File without changes
|
|
File without changes
|
{udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/crawl/preprocess_check_data.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/db/resource_exception.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/routes/resources_exceptions.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{udata_hydra-2.2.1.dev7337 → udata_hydra-2.2.1.dev7367}/udata_hydra/schemas/resource_exception.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|