udata-hydra 2.2.1.dev7347__tar.gz → 2.2.1.dev7367__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/PKG-INFO +1 -1
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/pyproject.toml +1 -1
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/cli.py +69 -11
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/README.md +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/__init__.py +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/analysis/__init__.py +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/analysis/csv.py +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/analysis/geojson.py +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/analysis/helpers.py +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/analysis/resource.py +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/app.py +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/config_default.toml +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/context.py +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/crawl/__init__.py +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/crawl/calculate_next_check.py +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/crawl/check_resources.py +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/crawl/helpers.py +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/crawl/preprocess_check_data.py +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/crawl/select_batch.py +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/db/__init__.py +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/db/check.py +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/db/resource.py +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/db/resource_exception.py +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/logger.py +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/migrations/__init__.py +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/migrations/csv/20221205_initial_up_rev1.sql +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/migrations/csv/20230130_drop_migrations.sql +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/migrations/csv/20230206_datetime_aware.sql +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/migrations/csv/20240827_add_indexes_column_to_tables_index_table.sql +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/migrations/main/20221205_initial_up_rev1.sql +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/migrations/main/20221206_rev1_up_rev2.sql +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/migrations/main/20221206_rev2_up_rev3.sql +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/migrations/main/20221208_rev3_up_rev4.sql +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/migrations/main/20221208_rev4_up_rev5.sql +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/migrations/main/20230119_rev5_up_rev6.sql +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/migrations/main/20230121_rev6_up_rev7.sql +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/migrations/main/20230121_rev7_up_rev8.sql +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/migrations/main/20230130_drop_migrations.sql +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/migrations/main/20230206_datetime_aware.sql +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/migrations/main/20230515_rev8_up_rev9.sql +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/migrations/main/20230606_rev9_up_rev10.sql +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/migrations/main/20231102_drop_csv_analysis.sql +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/migrations/main/20240827_add_resources_exceptions_table.sql +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/migrations/main/20240926_add_indexes.sql +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/migrations/main/20241004_add_comment_column_to_resources_exceptions.sql +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/migrations/main/20241021_add_parquet_columns.sql +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/migrations/main/20241023_alter_foreign_key.sql +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/migrations/main/20241025_add_next_check_column.sql +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/migrations/main/20250108_add_indexes.sql +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/migrations/main/20250130_add_pmtiles_fields.sql +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/routes/__init__.py +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/routes/checks.py +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/routes/resources.py +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/routes/resources_exceptions.py +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/routes/status.py +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/schemas/__init__.py +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/schemas/check.py +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/schemas/resource.py +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/schemas/resource_exception.py +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/utils/__init__.py +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/utils/auth.py +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/utils/csv.py +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/utils/db.py +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/utils/errors.py +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/utils/file.py +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/utils/geojson.py +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/utils/http.py +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/utils/minio.py +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/utils/parquet.py +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/utils/queue.py +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/utils/reader.py +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/utils/timer.py +0 -0
- {udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/worker.py +0 -0
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import csv
|
|
2
2
|
import logging
|
|
3
3
|
import os
|
|
4
|
-
from datetime import datetime, timezone
|
|
4
|
+
from datetime import datetime, timedelta, timezone
|
|
5
5
|
from pathlib import Path
|
|
6
6
|
from tempfile import NamedTemporaryFile
|
|
7
7
|
|
|
@@ -360,23 +360,27 @@ async def purge_csv_tables(quiet: bool = False) -> None:
|
|
|
360
360
|
ON checks.parsing_table = md5(c.url)
|
|
361
361
|
WHERE checks.parsing_table IS NOT NULL AND (c.id IS NULL OR c.deleted = TRUE);
|
|
362
362
|
"""
|
|
363
|
-
|
|
364
|
-
res: list[Record] = await
|
|
363
|
+
conn_main = await connection()
|
|
364
|
+
res: list[Record] = await conn_main.fetch(q)
|
|
365
365
|
tables_to_delete: list[str] = [r["parsing_table"] for r in res]
|
|
366
366
|
|
|
367
367
|
success_count = 0
|
|
368
368
|
error_count = 0
|
|
369
369
|
|
|
370
|
+
conn_csv = await connection(db_name="csv")
|
|
370
371
|
for table in tables_to_delete:
|
|
371
372
|
try:
|
|
372
|
-
async with
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
373
|
+
async with conn_main.transaction():
|
|
374
|
+
async with conn_csv.transaction():
|
|
375
|
+
log.debug(f'Deleting table "{table}"')
|
|
376
|
+
await conn_csv.execute(f'DROP TABLE IF EXISTS "{table}"')
|
|
377
|
+
await conn_main.execute(
|
|
378
|
+
"DELETE FROM tables_index WHERE parsing_table = $1", table
|
|
379
|
+
)
|
|
380
|
+
await conn_main.execute(
|
|
381
|
+
"UPDATE checks SET parsing_table = NULL WHERE parsing_table = $1", table
|
|
382
|
+
)
|
|
383
|
+
success_count += 1
|
|
380
384
|
except Exception as e:
|
|
381
385
|
error_count += 1
|
|
382
386
|
log.error(f'Failed to delete table "{table}": {str(e)}')
|
|
@@ -439,6 +443,60 @@ async def insert_resource_into_catalog(resource_id: str):
|
|
|
439
443
|
raise e
|
|
440
444
|
|
|
441
445
|
|
|
446
|
+
@cli
|
|
447
|
+
async def purge_selected_csv_tables(
|
|
448
|
+
nb_days_to_keep: int | None = None,
|
|
449
|
+
nb_tables_to_keep: int | None = None,
|
|
450
|
+
quiet: bool = False,
|
|
451
|
+
) -> None:
|
|
452
|
+
"""Delete converted CSV tables either:
|
|
453
|
+
- if they're more than nb_days_to_keep days old
|
|
454
|
+
- if they're not in the top nb_table_to_keep most recent
|
|
455
|
+
"""
|
|
456
|
+
if quiet:
|
|
457
|
+
log.setLevel(logging.ERROR)
|
|
458
|
+
|
|
459
|
+
assert nb_days_to_keep is not None or nb_tables_to_keep is not None
|
|
460
|
+
conn_csv = await connection(db_name="csv")
|
|
461
|
+
if nb_days_to_keep is not None:
|
|
462
|
+
threshold = datetime.now(timezone.utc) - timedelta(days=int(nb_days_to_keep))
|
|
463
|
+
q = """SELECT parsing_table FROM tables_index WHERE created_at <= $1"""
|
|
464
|
+
res: list[Record] = await conn_csv.fetch(q, threshold)
|
|
465
|
+
elif nb_tables_to_keep is not None:
|
|
466
|
+
q = """SELECT parsing_table FROM tables_index ORDER BY created_at DESC OFFSET $1"""
|
|
467
|
+
res: list[Record] = await conn_csv.fetch(q, int(nb_tables_to_keep))
|
|
468
|
+
|
|
469
|
+
tables_to_delete: list[str] = [r["parsing_table"] for r in res]
|
|
470
|
+
|
|
471
|
+
success_count = 0
|
|
472
|
+
error_count = 0
|
|
473
|
+
conn_main = await connection()
|
|
474
|
+
for table in tables_to_delete:
|
|
475
|
+
try:
|
|
476
|
+
async with conn_main.transaction():
|
|
477
|
+
async with conn_csv.transaction():
|
|
478
|
+
log.debug(f'Deleting table "{table}"')
|
|
479
|
+
await conn_csv.execute(f'DROP TABLE IF EXISTS "{table}"')
|
|
480
|
+
await conn_csv.execute(
|
|
481
|
+
"DELETE FROM tables_index WHERE parsing_table = $1", table
|
|
482
|
+
)
|
|
483
|
+
await conn_main.execute(
|
|
484
|
+
"UPDATE checks SET parsing_table = NULL WHERE parsing_table = $1", table
|
|
485
|
+
)
|
|
486
|
+
success_count += 1
|
|
487
|
+
except Exception as e:
|
|
488
|
+
error_count += 1
|
|
489
|
+
log.error(f'Failed to delete table "{table}": {str(e)}')
|
|
490
|
+
continue
|
|
491
|
+
|
|
492
|
+
if success_count:
|
|
493
|
+
log.info(f"Successfully deleted {success_count} table(s).")
|
|
494
|
+
if error_count:
|
|
495
|
+
log.warning(f"Failed to delete {error_count} table(s). Check logs for details.")
|
|
496
|
+
if not (success_count or error_count):
|
|
497
|
+
log.info("Nothing to delete.")
|
|
498
|
+
|
|
499
|
+
|
|
442
500
|
@wrap
|
|
443
501
|
async def cli_wrapper():
|
|
444
502
|
context["conn"] = {}
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/crawl/calculate_next_check.py
RENAMED
|
File without changes
|
{udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/crawl/check_resources.py
RENAMED
|
File without changes
|
|
File without changes
|
{udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/crawl/preprocess_check_data.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/db/resource_exception.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/routes/resources_exceptions.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{udata_hydra-2.2.1.dev7347 → udata_hydra-2.2.1.dev7367}/udata_hydra/schemas/resource_exception.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|