udata-hydra 2.0.4.dev5074__tar.gz → 2.0.4.dev5083__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of udata-hydra might be problematic. Click here for more details.
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/PKG-INFO +1 -1
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/pyproject.toml +1 -1
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/crawl/select_batch.py +6 -5
- udata_hydra-2.0.4.dev5083/udata_hydra/migrations/main/20241023_alter_foreign_key.sql +8 -0
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/README.md +0 -0
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/__init__.py +0 -0
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/analysis/__init__.py +0 -0
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/analysis/csv.py +0 -0
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/analysis/errors.py +0 -0
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/analysis/helpers.py +0 -0
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/analysis/resource.py +0 -0
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/app.py +0 -0
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/cli.py +0 -0
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/config_default.toml +0 -0
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/context.py +0 -0
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/crawl/__init__.py +0 -0
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/crawl/check_resources.py +0 -0
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/crawl/helpers.py +0 -0
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/crawl/process_check_data.py +0 -0
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/db/__init__.py +0 -0
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/db/check.py +0 -0
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/db/resource.py +0 -0
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/db/resource_exception.py +0 -0
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/logger.py +0 -0
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/migrations/__init__.py +0 -0
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/migrations/csv/20221205_initial_up_rev1.sql +0 -0
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/migrations/csv/20230130_drop_migrations.sql +0 -0
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/migrations/csv/20230206_datetime_aware.sql +0 -0
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/migrations/csv/20240827_add_indexes_column_to_tables_index_table.sql +0 -0
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/migrations/main/20221205_initial_up_rev1.sql +0 -0
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/migrations/main/20221206_rev1_up_rev2.sql +0 -0
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/migrations/main/20221206_rev2_up_rev3.sql +0 -0
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/migrations/main/20221208_rev3_up_rev4.sql +0 -0
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/migrations/main/20221208_rev4_up_rev5.sql +0 -0
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/migrations/main/20230119_rev5_up_rev6.sql +0 -0
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/migrations/main/20230121_rev6_up_rev7.sql +0 -0
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/migrations/main/20230121_rev7_up_rev8.sql +0 -0
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/migrations/main/20230130_drop_migrations.sql +0 -0
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/migrations/main/20230206_datetime_aware.sql +0 -0
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/migrations/main/20230515_rev8_up_rev9.sql +0 -0
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/migrations/main/20230606_rev9_up_rev10.sql +0 -0
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/migrations/main/20231102_drop_csv_analysis.sql +0 -0
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/migrations/main/20240827_add_resources_exceptions_table.sql +0 -0
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/migrations/main/20240926_add_indexes.sql +0 -0
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/migrations/main/20241004_add_comment_column_to_resources_exceptions.sql +0 -0
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/migrations/main/20241021_add_parquet_columns.sql +0 -0
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/routes/__init__.py +0 -0
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/routes/checks.py +0 -0
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/routes/resources.py +0 -0
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/routes/resources_exceptions.py +0 -0
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/routes/resources_legacy.py +0 -0
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/routes/status.py +0 -0
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/schemas/__init__.py +0 -0
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/schemas/check.py +0 -0
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/schemas/resource.py +0 -0
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/schemas/resource_exception.py +0 -0
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/utils/__init__.py +0 -0
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/utils/auth.py +0 -0
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/utils/csv.py +0 -0
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/utils/db.py +0 -0
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/utils/file.py +0 -0
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/utils/http.py +0 -0
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/utils/minio.py +0 -0
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/utils/parquet.py +0 -0
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/utils/queue.py +0 -0
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/utils/reader.py +0 -0
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/utils/timer.py +0 -0
- {udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/worker.py +0 -0
|
@@ -30,8 +30,8 @@ async def select_rows_based_on_query(connection, q: str, *args) -> list[Record]:
|
|
|
30
30
|
async def select_batch_resources_to_check() -> list[Record]:
|
|
31
31
|
"""Select a batch of resources to check from the catalog
|
|
32
32
|
- It first selects resources with priority=True
|
|
33
|
-
- ...then resources without
|
|
34
|
-
- and if the total number of selected resources is still less than the batch size, it will also add resources with outdated
|
|
33
|
+
- ...then resources without last check
|
|
34
|
+
- and if the total number of selected resources is still less than the batch size, it will also add resources with outdated last check in the batch
|
|
35
35
|
"""
|
|
36
36
|
context.monitor().set_status("Getting a batch from catalog...")
|
|
37
37
|
|
|
@@ -39,7 +39,7 @@ async def select_batch_resources_to_check() -> list[Record]:
|
|
|
39
39
|
async with pool.acquire() as connection:
|
|
40
40
|
excluded = Resource.get_excluded_clause()
|
|
41
41
|
|
|
42
|
-
# first
|
|
42
|
+
# first resources that are prioritised
|
|
43
43
|
q = f"""
|
|
44
44
|
SELECT * FROM (
|
|
45
45
|
SELECT catalog.url, dataset_id, resource_id
|
|
@@ -51,7 +51,8 @@ async def select_batch_resources_to_check() -> list[Record]:
|
|
|
51
51
|
"""
|
|
52
52
|
to_check: list[Record] = await select_rows_based_on_query(connection, q)
|
|
53
53
|
|
|
54
|
-
# then
|
|
54
|
+
# then resources with no last check
|
|
55
|
+
# (either because they have never been checked before, or because the last check has been deleted)
|
|
55
56
|
if len(to_check) < config.BATCH_SIZE:
|
|
56
57
|
q = f"""
|
|
57
58
|
SELECT * FROM (
|
|
@@ -65,7 +66,7 @@ async def select_batch_resources_to_check() -> list[Record]:
|
|
|
65
66
|
"""
|
|
66
67
|
to_check += await select_rows_based_on_query(connection, q)
|
|
67
68
|
|
|
68
|
-
# if not enough for our batch size, handle outdated
|
|
69
|
+
# if not enough for our batch size, handle resources with outdated last check
|
|
69
70
|
if len(to_check) < config.BATCH_SIZE:
|
|
70
71
|
since = parse_timespan(config.SINCE) # in seconds
|
|
71
72
|
since = datetime.now(timezone.utc) - timedelta(seconds=since)
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
-- Change the column `last_check` of the `catalog` table to be a foreign key to the `check` table.
|
|
2
|
+
-- (assuming `last_check` is already an INT)
|
|
3
|
+
-- When a check is deleted, the `last_check` column of the `catalog` table will be set to NULL.
|
|
4
|
+
|
|
5
|
+
ALTER TABLE catalog
|
|
6
|
+
ADD CONSTRAINT fk_last_check
|
|
7
|
+
FOREIGN KEY (last_check) REFERENCES checks(id)
|
|
8
|
+
ON DELETE SET NULL;
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/crawl/check_resources.py
RENAMED
|
File without changes
|
|
File without changes
|
{udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/crawl/process_check_data.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/db/resource_exception.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/routes/resources_exceptions.py
RENAMED
|
File without changes
|
{udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/routes/resources_legacy.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{udata_hydra-2.0.4.dev5074 → udata_hydra-2.0.4.dev5083}/udata_hydra/schemas/resource_exception.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|