zombie-squirrel 0.7.3__tar.gz → 0.8.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {zombie_squirrel-0.7.3/src/zombie_squirrel.egg-info → zombie_squirrel-0.8.0}/PKG-INFO +5 -5
- {zombie_squirrel-0.7.3 → zombie_squirrel-0.8.0}/README.md +3 -3
- {zombie_squirrel-0.7.3 → zombie_squirrel-0.8.0}/pyproject.toml +1 -1
- {zombie_squirrel-0.7.3 → zombie_squirrel-0.8.0}/src/zombie_squirrel/__init__.py +2 -2
- zombie_squirrel-0.7.3/src/zombie_squirrel/squirrels.py → zombie_squirrel-0.8.0/src/zombie_squirrel/acorns.py +35 -39
- zombie_squirrel-0.7.3/src/zombie_squirrel/acorns.py → zombie_squirrel-0.8.0/src/zombie_squirrel/forest.py +4 -4
- zombie_squirrel-0.8.0/src/zombie_squirrel/sync.py +18 -0
- {zombie_squirrel-0.7.3 → zombie_squirrel-0.8.0/src/zombie_squirrel.egg-info}/PKG-INFO +5 -5
- {zombie_squirrel-0.7.3 → zombie_squirrel-0.8.0}/src/zombie_squirrel.egg-info/SOURCES.txt +2 -2
- {zombie_squirrel-0.7.3 → zombie_squirrel-0.8.0}/src/zombie_squirrel.egg-info/requires.txt +1 -1
- zombie_squirrel-0.7.3/tests/test_squirrels.py → zombie_squirrel-0.8.0/tests/test_acorns.py +146 -76
- zombie_squirrel-0.8.0/tests/test_sync.py +85 -0
- zombie_squirrel-0.7.3/tests/test_acorns.py → zombie_squirrel-0.8.0/tests/test_trees.py +51 -51
- zombie_squirrel-0.7.3/src/zombie_squirrel/sync.py +0 -18
- zombie_squirrel-0.7.3/tests/test_sync.py +0 -85
- {zombie_squirrel-0.7.3 → zombie_squirrel-0.8.0}/LICENSE +0 -0
- {zombie_squirrel-0.7.3 → zombie_squirrel-0.8.0}/setup.cfg +0 -0
- {zombie_squirrel-0.7.3 → zombie_squirrel-0.8.0}/setup.py +0 -0
- {zombie_squirrel-0.7.3 → zombie_squirrel-0.8.0}/src/zombie_squirrel/utils.py +0 -0
- {zombie_squirrel-0.7.3 → zombie_squirrel-0.8.0}/src/zombie_squirrel.egg-info/dependency_links.txt +0 -0
- {zombie_squirrel-0.7.3 → zombie_squirrel-0.8.0}/src/zombie_squirrel.egg-info/top_level.txt +0 -0
- {zombie_squirrel-0.7.3 → zombie_squirrel-0.8.0}/tests/test_utils.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: zombie-squirrel
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.8.0
|
|
4
4
|
Summary: Generated from aind-library-template
|
|
5
5
|
Author: Allen Institute for Neural Dynamics
|
|
6
6
|
License: MIT
|
|
@@ -9,7 +9,7 @@ Requires-Python: >=3.10
|
|
|
9
9
|
Description-Content-Type: text/markdown
|
|
10
10
|
License-File: LICENSE
|
|
11
11
|
Requires-Dist: duckdb
|
|
12
|
-
Requires-Dist: fastparquet
|
|
12
|
+
Requires-Dist: fastparquet<2025
|
|
13
13
|
Requires-Dist: boto3
|
|
14
14
|
Requires-Dist: pandas
|
|
15
15
|
Requires-Dist: aind-data-access-api[docdb]
|
|
@@ -21,7 +21,7 @@ Dynamic: license-file
|
|
|
21
21
|

|
|
22
22
|
[](https://github.com/semantic-release/semantic-release)
|
|
23
23
|

|
|
24
|
-

|
|
25
25
|

|
|
26
26
|
|
|
27
27
|
<img src="zombie-squirrel_logo.png" width="400" alt="Logo (image from ChatGPT)">
|
|
@@ -37,10 +37,10 @@ pip install zombie-squirrel
|
|
|
37
37
|
### Set backend
|
|
38
38
|
|
|
39
39
|
```bash
|
|
40
|
-
export
|
|
40
|
+
export FOREST_TYPE='S3'
|
|
41
41
|
```
|
|
42
42
|
|
|
43
|
-
Options are '
|
|
43
|
+
Options are 'S3', 'MEMORY'.
|
|
44
44
|
|
|
45
45
|
### Scurry (fetch) data
|
|
46
46
|
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|

|
|
5
5
|
[](https://github.com/semantic-release/semantic-release)
|
|
6
6
|

|
|
7
|
-

|
|
8
8
|

|
|
9
9
|
|
|
10
10
|
<img src="zombie-squirrel_logo.png" width="400" alt="Logo (image from ChatGPT)">
|
|
@@ -20,10 +20,10 @@ pip install zombie-squirrel
|
|
|
20
20
|
### Set backend
|
|
21
21
|
|
|
22
22
|
```bash
|
|
23
|
-
export
|
|
23
|
+
export FOREST_TYPE='S3'
|
|
24
24
|
```
|
|
25
25
|
|
|
26
|
-
Options are '
|
|
26
|
+
Options are 'S3', 'MEMORY'.
|
|
27
27
|
|
|
28
28
|
### Scurry (fetch) data
|
|
29
29
|
|
|
@@ -3,9 +3,9 @@
|
|
|
3
3
|
Provides functions to fetch and cache project names, subject IDs, and asset
|
|
4
4
|
metadata from the AIND metadata database with support for multiple backends."""
|
|
5
5
|
|
|
6
|
-
__version__ = "0.
|
|
6
|
+
__version__ = "0.8.0"
|
|
7
7
|
|
|
8
|
-
from zombie_squirrel.
|
|
8
|
+
from zombie_squirrel.acorns import ( # noqa: F401
|
|
9
9
|
asset_basics,
|
|
10
10
|
raw_to_derived,
|
|
11
11
|
source_data,
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
"""
|
|
1
|
+
"""Acorns: functions to fetch and cache data from MongoDB."""
|
|
2
2
|
|
|
3
3
|
import logging
|
|
4
4
|
import os
|
|
@@ -8,41 +8,41 @@ from typing import Any
|
|
|
8
8
|
import pandas as pd
|
|
9
9
|
from aind_data_access_api.document_db import MetadataDbClient
|
|
10
10
|
|
|
11
|
-
from zombie_squirrel.
|
|
12
|
-
|
|
13
|
-
|
|
11
|
+
from zombie_squirrel.forest import (
|
|
12
|
+
MemoryTree,
|
|
13
|
+
S3Tree,
|
|
14
14
|
)
|
|
15
15
|
|
|
16
16
|
# --- Backend setup ---------------------------------------------------
|
|
17
17
|
|
|
18
18
|
API_GATEWAY_HOST = "api.allenneuraldynamics.org"
|
|
19
19
|
|
|
20
|
-
|
|
20
|
+
forest_type = os.getenv("FOREST_TYPE", "memory").lower()
|
|
21
21
|
|
|
22
|
-
if
|
|
23
|
-
logging.info("Using S3
|
|
24
|
-
|
|
22
|
+
if forest_type == "S3": # pragma: no cover
|
|
23
|
+
logging.info("Using S3 forest for caching")
|
|
24
|
+
TREE = S3Tree()
|
|
25
25
|
else:
|
|
26
|
-
logging.info("Using in-memory
|
|
27
|
-
|
|
26
|
+
logging.info("Using in-memory forest for caching")
|
|
27
|
+
TREE = MemoryTree()
|
|
28
28
|
|
|
29
|
-
# ---
|
|
29
|
+
# --- Acorn registry -----------------------------------------------------
|
|
30
30
|
|
|
31
|
-
|
|
31
|
+
ACORN_REGISTRY: dict[str, Callable[[], Any]] = {}
|
|
32
32
|
|
|
33
33
|
|
|
34
|
-
def
|
|
35
|
-
"""Decorator for registering new
|
|
34
|
+
def register_acorn(name: str):
|
|
35
|
+
"""Decorator for registering new acorns."""
|
|
36
36
|
|
|
37
37
|
def decorator(func):
|
|
38
|
-
"""Register function in
|
|
39
|
-
|
|
38
|
+
"""Register function in acorn registry."""
|
|
39
|
+
ACORN_REGISTRY[name] = func
|
|
40
40
|
return func
|
|
41
41
|
|
|
42
42
|
return decorator
|
|
43
43
|
|
|
44
44
|
|
|
45
|
-
# ---
|
|
45
|
+
# --- Acorns -----------------------------------------------------
|
|
46
46
|
|
|
47
47
|
NAMES = {
|
|
48
48
|
"upn": "unique_project_names",
|
|
@@ -53,7 +53,7 @@ NAMES = {
|
|
|
53
53
|
}
|
|
54
54
|
|
|
55
55
|
|
|
56
|
-
@
|
|
56
|
+
@register_acorn(NAMES["upn"])
|
|
57
57
|
def unique_project_names(force_update: bool = False) -> list[str]:
|
|
58
58
|
"""Fetch unique project names from metadata database.
|
|
59
59
|
|
|
@@ -65,7 +65,7 @@ def unique_project_names(force_update: bool = False) -> list[str]:
|
|
|
65
65
|
|
|
66
66
|
Returns:
|
|
67
67
|
List of unique project names."""
|
|
68
|
-
df =
|
|
68
|
+
df = TREE.scurry(NAMES["upn"])
|
|
69
69
|
|
|
70
70
|
if df.empty or force_update:
|
|
71
71
|
# If cache is missing, fetch data
|
|
@@ -81,12 +81,12 @@ def unique_project_names(force_update: bool = False) -> list[str]:
|
|
|
81
81
|
]
|
|
82
82
|
)
|
|
83
83
|
df = pd.DataFrame(unique_project_names)
|
|
84
|
-
|
|
84
|
+
TREE.hide(NAMES["upn"], df)
|
|
85
85
|
|
|
86
86
|
return df["project_name"].tolist()
|
|
87
87
|
|
|
88
88
|
|
|
89
|
-
@
|
|
89
|
+
@register_acorn(NAMES["usi"])
|
|
90
90
|
def unique_subject_ids(force_update: bool = False) -> list[str]:
|
|
91
91
|
"""Fetch unique subject IDs from metadata database.
|
|
92
92
|
|
|
@@ -98,7 +98,7 @@ def unique_subject_ids(force_update: bool = False) -> list[str]:
|
|
|
98
98
|
|
|
99
99
|
Returns:
|
|
100
100
|
List of unique subject IDs."""
|
|
101
|
-
df =
|
|
101
|
+
df = TREE.scurry(NAMES["usi"])
|
|
102
102
|
|
|
103
103
|
if df.empty or force_update:
|
|
104
104
|
# If cache is missing, fetch data
|
|
@@ -114,12 +114,12 @@ def unique_subject_ids(force_update: bool = False) -> list[str]:
|
|
|
114
114
|
]
|
|
115
115
|
)
|
|
116
116
|
df = pd.DataFrame(unique_subject_ids)
|
|
117
|
-
|
|
117
|
+
TREE.hide(NAMES["usi"], df)
|
|
118
118
|
|
|
119
119
|
return df["subject_id"].tolist()
|
|
120
120
|
|
|
121
121
|
|
|
122
|
-
@
|
|
122
|
+
@register_acorn(NAMES["basics"])
|
|
123
123
|
def asset_basics(force_update: bool = False) -> pd.DataFrame:
|
|
124
124
|
"""Fetch basic asset metadata including modalities, projects, and subject info.
|
|
125
125
|
|
|
@@ -133,7 +133,7 @@ def asset_basics(force_update: bool = False) -> pd.DataFrame:
|
|
|
133
133
|
|
|
134
134
|
Returns:
|
|
135
135
|
DataFrame with basic asset metadata."""
|
|
136
|
-
df =
|
|
136
|
+
df = TREE.scurry(NAMES["basics"])
|
|
137
137
|
|
|
138
138
|
FIELDS = [
|
|
139
139
|
"data_description.modalities",
|
|
@@ -146,7 +146,6 @@ def asset_basics(force_update: bool = False) -> pd.DataFrame:
|
|
|
146
146
|
"subject.subject_details.genotype",
|
|
147
147
|
"other_identifiers",
|
|
148
148
|
"location",
|
|
149
|
-
"name",
|
|
150
149
|
]
|
|
151
150
|
|
|
152
151
|
if df.empty or force_update:
|
|
@@ -165,7 +164,6 @@ def asset_basics(force_update: bool = False) -> pd.DataFrame:
|
|
|
165
164
|
"process_date",
|
|
166
165
|
"genotype",
|
|
167
166
|
"location",
|
|
168
|
-
"name",
|
|
169
167
|
]
|
|
170
168
|
)
|
|
171
169
|
client = MetadataDbClient(
|
|
@@ -218,11 +216,10 @@ def asset_basics(force_update: bool = False) -> pd.DataFrame:
|
|
|
218
216
|
|
|
219
217
|
# Get the CO asset ID
|
|
220
218
|
other_identifiers = record.get("other_identifiers", {})
|
|
221
|
-
code_ocean = None
|
|
222
219
|
if other_identifiers:
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
220
|
+
code_ocean = other_identifiers.get("Code Ocean", None)
|
|
221
|
+
else:
|
|
222
|
+
code_ocean = None
|
|
226
223
|
|
|
227
224
|
flat_record = {
|
|
228
225
|
"_id": record["_id"],
|
|
@@ -237,7 +234,6 @@ def asset_basics(force_update: bool = False) -> pd.DataFrame:
|
|
|
237
234
|
"process_date": process_date,
|
|
238
235
|
"genotype": record.get("subject", {}).get("subject_details", {}).get("genotype", None),
|
|
239
236
|
"location": record.get("location", None),
|
|
240
|
-
"name": record.get("name", None),
|
|
241
237
|
}
|
|
242
238
|
records.append(flat_record)
|
|
243
239
|
|
|
@@ -245,12 +241,12 @@ def asset_basics(force_update: bool = False) -> pd.DataFrame:
|
|
|
245
241
|
new_df = pd.DataFrame(records)
|
|
246
242
|
df = pd.concat([df[~df["_id"].isin(keep_ids)], new_df], ignore_index=True)
|
|
247
243
|
|
|
248
|
-
|
|
244
|
+
TREE.hide(NAMES["basics"], df)
|
|
249
245
|
|
|
250
246
|
return df
|
|
251
247
|
|
|
252
248
|
|
|
253
|
-
@
|
|
249
|
+
@register_acorn(NAMES["d2r"])
|
|
254
250
|
def source_data(force_update: bool = False) -> pd.DataFrame:
|
|
255
251
|
"""Fetch source data references for derived records.
|
|
256
252
|
|
|
@@ -262,7 +258,7 @@ def source_data(force_update: bool = False) -> pd.DataFrame:
|
|
|
262
258
|
|
|
263
259
|
Returns:
|
|
264
260
|
DataFrame with _id and source_data columns."""
|
|
265
|
-
df =
|
|
261
|
+
df = TREE.scurry(NAMES["d2r"])
|
|
266
262
|
|
|
267
263
|
if df.empty or force_update:
|
|
268
264
|
logging.info("Updating cache for source data")
|
|
@@ -287,12 +283,12 @@ def source_data(force_update: bool = False) -> pd.DataFrame:
|
|
|
287
283
|
)
|
|
288
284
|
|
|
289
285
|
df = pd.DataFrame(data)
|
|
290
|
-
|
|
286
|
+
TREE.hide(NAMES["d2r"], df)
|
|
291
287
|
|
|
292
288
|
return df
|
|
293
289
|
|
|
294
290
|
|
|
295
|
-
@
|
|
291
|
+
@register_acorn(NAMES["r2d"])
|
|
296
292
|
def raw_to_derived(force_update: bool = False) -> pd.DataFrame:
|
|
297
293
|
"""Fetch mapping of raw records to their derived records.
|
|
298
294
|
|
|
@@ -304,7 +300,7 @@ def raw_to_derived(force_update: bool = False) -> pd.DataFrame:
|
|
|
304
300
|
|
|
305
301
|
Returns:
|
|
306
302
|
DataFrame with _id and derived_records columns."""
|
|
307
|
-
df =
|
|
303
|
+
df = TREE.scurry(NAMES["r2d"])
|
|
308
304
|
|
|
309
305
|
if df.empty or force_update:
|
|
310
306
|
logging.info("Updating cache for raw to derived mapping")
|
|
@@ -350,6 +346,6 @@ def raw_to_derived(force_update: bool = False) -> pd.DataFrame:
|
|
|
350
346
|
)
|
|
351
347
|
|
|
352
348
|
df = pd.DataFrame(data)
|
|
353
|
-
|
|
349
|
+
TREE.hide(NAMES["r2d"], df)
|
|
354
350
|
|
|
355
351
|
return df
|
|
@@ -11,11 +11,11 @@ import pandas as pd
|
|
|
11
11
|
from zombie_squirrel.utils import get_s3_cache_path, prefix_table_name
|
|
12
12
|
|
|
13
13
|
|
|
14
|
-
class
|
|
14
|
+
class Tree(ABC):
|
|
15
15
|
"""Base class for a storage backend (the cache)."""
|
|
16
16
|
|
|
17
17
|
def __init__(self) -> None:
|
|
18
|
-
"""Initialize the
|
|
18
|
+
"""Initialize the Tree."""
|
|
19
19
|
super().__init__()
|
|
20
20
|
|
|
21
21
|
@abstractmethod
|
|
@@ -29,7 +29,7 @@ class Acorn(ABC):
|
|
|
29
29
|
pass # pragma: no cover
|
|
30
30
|
|
|
31
31
|
|
|
32
|
-
class
|
|
32
|
+
class S3Tree(Tree):
|
|
33
33
|
"""Stores and retrieves caches using AWS S3 with parquet files."""
|
|
34
34
|
|
|
35
35
|
def __init__(self) -> None:
|
|
@@ -79,7 +79,7 @@ class S3Acorn(Acorn):
|
|
|
79
79
|
return pd.DataFrame()
|
|
80
80
|
|
|
81
81
|
|
|
82
|
-
class
|
|
82
|
+
class MemoryTree(Tree):
|
|
83
83
|
"""A simple in-memory backend for testing or local development."""
|
|
84
84
|
|
|
85
85
|
def __init__(self) -> None:
|
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
"""Synchronization utilities for updating all cached data."""
|
|
2
|
+
|
|
3
|
+
import logging
|
|
4
|
+
|
|
5
|
+
from .acorns import ACORN_REGISTRY
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def hide_acorns():
|
|
9
|
+
"""Trigger force update of all registered acorn functions.
|
|
10
|
+
|
|
11
|
+
Calls each acorn function with force_update=True to refresh
|
|
12
|
+
all cached data in the tree backend."""
|
|
13
|
+
logging.basicConfig(
|
|
14
|
+
level=logging.INFO,
|
|
15
|
+
format="%(asctime)s %(levelname)s %(message)s"
|
|
16
|
+
)
|
|
17
|
+
for acorn in ACORN_REGISTRY.values():
|
|
18
|
+
acorn(force_update=True)
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: zombie-squirrel
|
|
3
|
-
Version: 0.
|
|
3
|
+
Version: 0.8.0
|
|
4
4
|
Summary: Generated from aind-library-template
|
|
5
5
|
Author: Allen Institute for Neural Dynamics
|
|
6
6
|
License: MIT
|
|
@@ -9,7 +9,7 @@ Requires-Python: >=3.10
|
|
|
9
9
|
Description-Content-Type: text/markdown
|
|
10
10
|
License-File: LICENSE
|
|
11
11
|
Requires-Dist: duckdb
|
|
12
|
-
Requires-Dist: fastparquet
|
|
12
|
+
Requires-Dist: fastparquet<2025
|
|
13
13
|
Requires-Dist: boto3
|
|
14
14
|
Requires-Dist: pandas
|
|
15
15
|
Requires-Dist: aind-data-access-api[docdb]
|
|
@@ -21,7 +21,7 @@ Dynamic: license-file
|
|
|
21
21
|

|
|
22
22
|
[](https://github.com/semantic-release/semantic-release)
|
|
23
23
|

|
|
24
|
-

|
|
25
25
|

|
|
26
26
|
|
|
27
27
|
<img src="zombie-squirrel_logo.png" width="400" alt="Logo (image from ChatGPT)">
|
|
@@ -37,10 +37,10 @@ pip install zombie-squirrel
|
|
|
37
37
|
### Set backend
|
|
38
38
|
|
|
39
39
|
```bash
|
|
40
|
-
export
|
|
40
|
+
export FOREST_TYPE='S3'
|
|
41
41
|
```
|
|
42
42
|
|
|
43
|
-
Options are '
|
|
43
|
+
Options are 'S3', 'MEMORY'.
|
|
44
44
|
|
|
45
45
|
### Scurry (fetch) data
|
|
46
46
|
|
|
@@ -4,7 +4,7 @@ pyproject.toml
|
|
|
4
4
|
setup.py
|
|
5
5
|
src/zombie_squirrel/__init__.py
|
|
6
6
|
src/zombie_squirrel/acorns.py
|
|
7
|
-
src/zombie_squirrel/
|
|
7
|
+
src/zombie_squirrel/forest.py
|
|
8
8
|
src/zombie_squirrel/sync.py
|
|
9
9
|
src/zombie_squirrel/utils.py
|
|
10
10
|
src/zombie_squirrel.egg-info/PKG-INFO
|
|
@@ -13,6 +13,6 @@ src/zombie_squirrel.egg-info/dependency_links.txt
|
|
|
13
13
|
src/zombie_squirrel.egg-info/requires.txt
|
|
14
14
|
src/zombie_squirrel.egg-info/top_level.txt
|
|
15
15
|
tests/test_acorns.py
|
|
16
|
-
tests/test_squirrels.py
|
|
17
16
|
tests/test_sync.py
|
|
17
|
+
tests/test_trees.py
|
|
18
18
|
tests/test_utils.py
|
|
@@ -1,16 +1,16 @@
|
|
|
1
|
-
"""Unit tests for zombie_squirrel.
|
|
1
|
+
"""Unit tests for zombie_squirrel.acorns module.
|
|
2
2
|
|
|
3
|
-
Tests for
|
|
3
|
+
Tests for acorn functions, caching, and registry mechanism."""
|
|
4
4
|
|
|
5
5
|
import unittest
|
|
6
6
|
from unittest.mock import MagicMock, patch
|
|
7
7
|
|
|
8
8
|
import pandas as pd
|
|
9
9
|
|
|
10
|
-
from zombie_squirrel.
|
|
11
|
-
from zombie_squirrel.
|
|
10
|
+
from zombie_squirrel.forest import MemoryTree
|
|
11
|
+
from zombie_squirrel.acorns import (
|
|
12
12
|
NAMES,
|
|
13
|
-
|
|
13
|
+
ACORN_REGISTRY,
|
|
14
14
|
asset_basics,
|
|
15
15
|
raw_to_derived,
|
|
16
16
|
source_data,
|
|
@@ -19,20 +19,20 @@ from zombie_squirrel.squirrels import (
|
|
|
19
19
|
)
|
|
20
20
|
|
|
21
21
|
|
|
22
|
-
class
|
|
23
|
-
"""Tests for
|
|
22
|
+
class TestAcornRegistration(unittest.TestCase):
|
|
23
|
+
"""Tests for acorn registration mechanism."""
|
|
24
24
|
|
|
25
|
-
def
|
|
26
|
-
"""Test that all
|
|
27
|
-
self.assertIn(NAMES["upn"],
|
|
28
|
-
self.assertIn(NAMES["usi"],
|
|
29
|
-
self.assertIn(NAMES["basics"],
|
|
30
|
-
self.assertIn(NAMES["d2r"],
|
|
31
|
-
self.assertIn(NAMES["r2d"],
|
|
25
|
+
def test_acorn_registry_contains_all_functions(self):
|
|
26
|
+
"""Test that all acorn functions are registered."""
|
|
27
|
+
self.assertIn(NAMES["upn"], ACORN_REGISTRY)
|
|
28
|
+
self.assertIn(NAMES["usi"], ACORN_REGISTRY)
|
|
29
|
+
self.assertIn(NAMES["basics"], ACORN_REGISTRY)
|
|
30
|
+
self.assertIn(NAMES["d2r"], ACORN_REGISTRY)
|
|
31
|
+
self.assertIn(NAMES["r2d"], ACORN_REGISTRY)
|
|
32
32
|
|
|
33
33
|
def test_registry_values_are_callable(self):
|
|
34
34
|
"""Test that registry values are callable functions."""
|
|
35
|
-
for name, func in
|
|
35
|
+
for name, func in ACORN_REGISTRY.items():
|
|
36
36
|
self.assertTrue(callable(func), f"{name} is not callable")
|
|
37
37
|
|
|
38
38
|
def test_names_dict_completeness(self):
|
|
@@ -43,23 +43,23 @@ class TestSquirrelRegistration(unittest.TestCase):
|
|
|
43
43
|
|
|
44
44
|
|
|
45
45
|
class TestUniqueProjectNames(unittest.TestCase):
|
|
46
|
-
"""Tests for unique_project_names
|
|
46
|
+
"""Tests for unique_project_names acorn."""
|
|
47
47
|
|
|
48
|
-
@patch("zombie_squirrel.
|
|
49
|
-
@patch("zombie_squirrel.
|
|
50
|
-
def test_unique_project_names_cache_hit(self, mock_client_class,
|
|
48
|
+
@patch("zombie_squirrel.acorns.TREE", new_callable=MemoryTree)
|
|
49
|
+
@patch("zombie_squirrel.acorns.MetadataDbClient")
|
|
50
|
+
def test_unique_project_names_cache_hit(self, mock_client_class, mock_tree):
|
|
51
51
|
"""Test returning cached project names."""
|
|
52
52
|
cached_df = pd.DataFrame({"project_name": ["proj1", "proj2", "proj3"]})
|
|
53
|
-
|
|
53
|
+
mock_tree.hide(NAMES["upn"], cached_df)
|
|
54
54
|
|
|
55
55
|
result = unique_project_names()
|
|
56
56
|
|
|
57
57
|
self.assertEqual(result, ["proj1", "proj2", "proj3"])
|
|
58
58
|
mock_client_class.assert_not_called()
|
|
59
59
|
|
|
60
|
-
@patch("zombie_squirrel.
|
|
61
|
-
@patch("zombie_squirrel.
|
|
62
|
-
def test_unique_project_names_cache_miss(self, mock_client_class,
|
|
60
|
+
@patch("zombie_squirrel.acorns.TREE", new_callable=MemoryTree)
|
|
61
|
+
@patch("zombie_squirrel.acorns.MetadataDbClient")
|
|
62
|
+
def test_unique_project_names_cache_miss(self, mock_client_class, mock_tree):
|
|
63
63
|
"""Test fetching project names when cache is empty."""
|
|
64
64
|
mock_client_instance = MagicMock()
|
|
65
65
|
mock_client_class.return_value = mock_client_instance
|
|
@@ -74,12 +74,12 @@ class TestUniqueProjectNames(unittest.TestCase):
|
|
|
74
74
|
mock_client_class.assert_called_once()
|
|
75
75
|
mock_client_instance.aggregate_docdb_records.assert_called_once()
|
|
76
76
|
|
|
77
|
-
@patch("zombie_squirrel.
|
|
78
|
-
@patch("zombie_squirrel.
|
|
79
|
-
def test_unique_project_names_force_update(self, mock_client_class,
|
|
77
|
+
@patch("zombie_squirrel.acorns.TREE", new_callable=MemoryTree)
|
|
78
|
+
@patch("zombie_squirrel.acorns.MetadataDbClient")
|
|
79
|
+
def test_unique_project_names_force_update(self, mock_client_class, mock_tree):
|
|
80
80
|
"""Test force_update bypasses cache."""
|
|
81
81
|
cached_df = pd.DataFrame({"project_name": ["old_proj"]})
|
|
82
|
-
|
|
82
|
+
mock_tree.hide(NAMES["upn"], cached_df)
|
|
83
83
|
|
|
84
84
|
mock_client_instance = MagicMock()
|
|
85
85
|
mock_client_class.return_value = mock_client_instance
|
|
@@ -94,21 +94,21 @@ class TestUniqueProjectNames(unittest.TestCase):
|
|
|
94
94
|
class TestUniqueSubjectIds(unittest.TestCase):
|
|
95
95
|
"""Tests for unique_subject_ids squirrel."""
|
|
96
96
|
|
|
97
|
-
@patch("zombie_squirrel.
|
|
98
|
-
@patch("zombie_squirrel.
|
|
99
|
-
def test_unique_subject_ids_cache_hit(self, mock_client_class,
|
|
97
|
+
@patch("zombie_squirrel.acorns.TREE", new_callable=MemoryTree)
|
|
98
|
+
@patch("zombie_squirrel.acorns.MetadataDbClient")
|
|
99
|
+
def test_unique_subject_ids_cache_hit(self, mock_client_class, mock_tree):
|
|
100
100
|
"""Test returning cached subject IDs."""
|
|
101
101
|
cached_df = pd.DataFrame({"subject_id": ["sub001", "sub002"]})
|
|
102
|
-
|
|
102
|
+
mock_tree.hide(NAMES["usi"], cached_df)
|
|
103
103
|
|
|
104
104
|
result = unique_subject_ids()
|
|
105
105
|
|
|
106
106
|
self.assertEqual(result, ["sub001", "sub002"])
|
|
107
107
|
mock_client_class.assert_not_called()
|
|
108
108
|
|
|
109
|
-
@patch("zombie_squirrel.
|
|
110
|
-
@patch("zombie_squirrel.
|
|
111
|
-
def test_unique_subject_ids_cache_miss(self, mock_client_class,
|
|
109
|
+
@patch("zombie_squirrel.acorns.TREE", new_callable=MemoryTree)
|
|
110
|
+
@patch("zombie_squirrel.acorns.MetadataDbClient")
|
|
111
|
+
def test_unique_subject_ids_cache_miss(self, mock_client_class, mock_tree):
|
|
112
112
|
"""Test fetching subject IDs when cache is empty."""
|
|
113
113
|
mock_client_instance = MagicMock()
|
|
114
114
|
mock_client_class.return_value = mock_client_instance
|
|
@@ -122,12 +122,12 @@ class TestUniqueSubjectIds(unittest.TestCase):
|
|
|
122
122
|
self.assertEqual(result, ["sub001", "sub002"])
|
|
123
123
|
mock_client_class.assert_called_once()
|
|
124
124
|
|
|
125
|
-
@patch("zombie_squirrel.
|
|
126
|
-
@patch("zombie_squirrel.
|
|
127
|
-
def test_unique_subject_ids_force_update(self, mock_client_class,
|
|
125
|
+
@patch("zombie_squirrel.acorns.TREE", new_callable=MemoryTree)
|
|
126
|
+
@patch("zombie_squirrel.acorns.MetadataDbClient")
|
|
127
|
+
def test_unique_subject_ids_force_update(self, mock_client_class, mock_tree):
|
|
128
128
|
"""Test force_update bypasses cache."""
|
|
129
129
|
cached_df = pd.DataFrame({"subject_id": ["old_sub"]})
|
|
130
|
-
|
|
130
|
+
mock_tree.hide(NAMES["usi"], cached_df)
|
|
131
131
|
|
|
132
132
|
mock_client_instance = MagicMock()
|
|
133
133
|
mock_client_class.return_value = mock_client_instance
|
|
@@ -141,9 +141,9 @@ class TestUniqueSubjectIds(unittest.TestCase):
|
|
|
141
141
|
class TestAssetBasics(unittest.TestCase):
|
|
142
142
|
"""Tests for asset_basics squirrel."""
|
|
143
143
|
|
|
144
|
-
@patch("zombie_squirrel.
|
|
145
|
-
@patch("zombie_squirrel.
|
|
146
|
-
def test_asset_basics_cache_hit(self, mock_client_class,
|
|
144
|
+
@patch("zombie_squirrel.acorns.TREE", new_callable=MemoryTree)
|
|
145
|
+
@patch("zombie_squirrel.acorns.MetadataDbClient")
|
|
146
|
+
def test_asset_basics_cache_hit(self, mock_client_class, mock_tree):
|
|
147
147
|
"""Test returning cached asset basics."""
|
|
148
148
|
cached_df = pd.DataFrame(
|
|
149
149
|
{
|
|
@@ -163,7 +163,7 @@ class TestAssetBasics(unittest.TestCase):
|
|
|
163
163
|
],
|
|
164
164
|
}
|
|
165
165
|
)
|
|
166
|
-
|
|
166
|
+
mock_tree.hide(NAMES["basics"], cached_df)
|
|
167
167
|
|
|
168
168
|
result = asset_basics()
|
|
169
169
|
|
|
@@ -171,9 +171,9 @@ class TestAssetBasics(unittest.TestCase):
|
|
|
171
171
|
self.assertListEqual(list(result["_id"]), ["id1", "id2"])
|
|
172
172
|
mock_client_class.assert_not_called()
|
|
173
173
|
|
|
174
|
-
@patch("zombie_squirrel.
|
|
175
|
-
@patch("zombie_squirrel.
|
|
176
|
-
def test_asset_basics_cache_miss(self, mock_client_class,
|
|
174
|
+
@patch("zombie_squirrel.acorns.TREE", new_callable=MemoryTree)
|
|
175
|
+
@patch("zombie_squirrel.acorns.MetadataDbClient")
|
|
176
|
+
def test_asset_basics_cache_miss(self, mock_client_class, mock_tree):
|
|
177
177
|
"""Test fetching asset basics when cache is empty."""
|
|
178
178
|
mock_client_instance = MagicMock()
|
|
179
179
|
mock_client_class.return_value = mock_client_instance
|
|
@@ -202,10 +202,10 @@ class TestAssetBasics(unittest.TestCase):
|
|
|
202
202
|
self.assertEqual(result.iloc[0]["modalities"], "img")
|
|
203
203
|
self.assertEqual(result.iloc[0]["project_name"], "proj1")
|
|
204
204
|
|
|
205
|
-
@patch("zombie_squirrel.
|
|
206
|
-
@patch("zombie_squirrel.
|
|
205
|
+
@patch("zombie_squirrel.acorns.TREE", new_callable=MemoryTree)
|
|
206
|
+
@patch("zombie_squirrel.acorns.MetadataDbClient")
|
|
207
207
|
def test_asset_basics_with_data_processes(
|
|
208
|
-
self, mock_client_class,
|
|
208
|
+
self, mock_client_class, mock_tree
|
|
209
209
|
):
|
|
210
210
|
"""Test asset_basics includes process_date from data_processes."""
|
|
211
211
|
mock_client_instance = MagicMock()
|
|
@@ -240,10 +240,10 @@ class TestAssetBasics(unittest.TestCase):
|
|
|
240
240
|
self.assertEqual(result.iloc[0]["_id"], "id1")
|
|
241
241
|
self.assertEqual(result.iloc[0]["process_date"], "2023-01-20")
|
|
242
242
|
|
|
243
|
-
@patch("zombie_squirrel.
|
|
244
|
-
@patch("zombie_squirrel.
|
|
243
|
+
@patch("zombie_squirrel.acorns.TREE", new_callable=MemoryTree)
|
|
244
|
+
@patch("zombie_squirrel.acorns.MetadataDbClient")
|
|
245
245
|
def test_asset_basics_incremental_update(
|
|
246
|
-
self, mock_client_class,
|
|
246
|
+
self, mock_client_class, mock_tree
|
|
247
247
|
):
|
|
248
248
|
"""Test incremental cache update with partial data refresh."""
|
|
249
249
|
mock_client_instance = MagicMock()
|
|
@@ -277,13 +277,83 @@ class TestAssetBasics(unittest.TestCase):
|
|
|
277
277
|
self.assertEqual(len(result), 1)
|
|
278
278
|
self.assertEqual(result.iloc[0]["_id"], "id2")
|
|
279
279
|
|
|
280
|
+
@patch("zombie_squirrel.acorns.TREE", new_callable=MemoryTree)
|
|
281
|
+
@patch("zombie_squirrel.acorns.MetadataDbClient")
|
|
282
|
+
def test_asset_basics_with_other_identifiers_no_code_ocean(
|
|
283
|
+
self, mock_client_class, mock_tree
|
|
284
|
+
):
|
|
285
|
+
"""Test asset_basics when other_identifiers exists but has no Code Ocean."""
|
|
286
|
+
mock_client_instance = MagicMock()
|
|
287
|
+
mock_client_class.return_value = mock_client_instance
|
|
288
|
+
|
|
289
|
+
mock_client_instance.retrieve_docdb_records.return_value = [
|
|
290
|
+
{
|
|
291
|
+
"_id": "id1",
|
|
292
|
+
"_last_modified": "2023-01-01",
|
|
293
|
+
"data_description": {
|
|
294
|
+
"modalities": [{"abbreviation": "img"}],
|
|
295
|
+
"project_name": "proj1",
|
|
296
|
+
"data_level": "raw",
|
|
297
|
+
},
|
|
298
|
+
"subject": {"subject_id": "sub001"},
|
|
299
|
+
"acquisition": {
|
|
300
|
+
"acquisition_start_time": "2023-01-01T10:00:00",
|
|
301
|
+
"acquisition_end_time": "2023-01-01T11:00:00",
|
|
302
|
+
},
|
|
303
|
+
"other_identifiers": {"Some Other Field": "value123"},
|
|
304
|
+
}
|
|
305
|
+
]
|
|
306
|
+
|
|
307
|
+
result = asset_basics()
|
|
308
|
+
|
|
309
|
+
self.assertEqual(len(result), 1)
|
|
310
|
+
self.assertEqual(result.iloc[0]["_id"], "id1")
|
|
311
|
+
self.assertIsNone(result.iloc[0]["code_ocean"])
|
|
312
|
+
|
|
313
|
+
@patch("zombie_squirrel.acorns.TREE", new_callable=MemoryTree)
|
|
314
|
+
@patch("zombie_squirrel.acorns.MetadataDbClient")
|
|
315
|
+
def test_asset_basics_with_code_ocean_identifier(
|
|
316
|
+
self, mock_client_class, mock_tree
|
|
317
|
+
):
|
|
318
|
+
"""Test asset_basics when other_identifiers contains Code Ocean."""
|
|
319
|
+
mock_client_instance = MagicMock()
|
|
320
|
+
mock_client_class.return_value = mock_client_instance
|
|
321
|
+
|
|
322
|
+
mock_client_instance.retrieve_docdb_records.return_value = [
|
|
323
|
+
{
|
|
324
|
+
"_id": "id1",
|
|
325
|
+
"_last_modified": "2023-01-01",
|
|
326
|
+
"data_description": {
|
|
327
|
+
"modalities": [{"abbreviation": "img"}],
|
|
328
|
+
"project_name": "proj1",
|
|
329
|
+
"data_level": "raw",
|
|
330
|
+
},
|
|
331
|
+
"subject": {"subject_id": "sub001"},
|
|
332
|
+
"acquisition": {
|
|
333
|
+
"acquisition_start_time": "2023-01-01T10:00:00",
|
|
334
|
+
"acquisition_end_time": "2023-01-01T11:00:00",
|
|
335
|
+
},
|
|
336
|
+
"other_identifiers": {
|
|
337
|
+
"Code Ocean": ["df429003-91a0-45d2-8457-66b156ad8bfa"]
|
|
338
|
+
},
|
|
339
|
+
}
|
|
340
|
+
]
|
|
341
|
+
|
|
342
|
+
result = asset_basics()
|
|
343
|
+
|
|
344
|
+
self.assertEqual(len(result), 1)
|
|
345
|
+
self.assertEqual(result.iloc[0]["_id"], "id1")
|
|
346
|
+
self.assertEqual(
|
|
347
|
+
result.iloc[0]["code_ocean"], ["df429003-91a0-45d2-8457-66b156ad8bfa"]
|
|
348
|
+
)
|
|
349
|
+
|
|
280
350
|
|
|
281
351
|
class TestSourceData(unittest.TestCase):
|
|
282
352
|
"""Tests for source_data squirrel."""
|
|
283
353
|
|
|
284
|
-
@patch("zombie_squirrel.
|
|
285
|
-
@patch("zombie_squirrel.
|
|
286
|
-
def test_source_data_cache_hit(self, mock_client_class,
|
|
354
|
+
@patch("zombie_squirrel.acorns.TREE", new_callable=MemoryTree)
|
|
355
|
+
@patch("zombie_squirrel.acorns.MetadataDbClient")
|
|
356
|
+
def test_source_data_cache_hit(self, mock_client_class, mock_tree):
|
|
287
357
|
"""Test returning cached source data."""
|
|
288
358
|
cached_df = pd.DataFrame(
|
|
289
359
|
{
|
|
@@ -291,7 +361,7 @@ class TestSourceData(unittest.TestCase):
|
|
|
291
361
|
"source_data": ["source1, source2", "source3"],
|
|
292
362
|
}
|
|
293
363
|
)
|
|
294
|
-
|
|
364
|
+
mock_tree.hide(NAMES["d2r"], cached_df)
|
|
295
365
|
|
|
296
366
|
result = source_data()
|
|
297
367
|
|
|
@@ -299,9 +369,9 @@ class TestSourceData(unittest.TestCase):
|
|
|
299
369
|
self.assertEqual(result.iloc[0]["source_data"], "source1, source2")
|
|
300
370
|
mock_client_class.assert_not_called()
|
|
301
371
|
|
|
302
|
-
@patch("zombie_squirrel.
|
|
303
|
-
@patch("zombie_squirrel.
|
|
304
|
-
def test_source_data_cache_miss(self, mock_client_class,
|
|
372
|
+
@patch("zombie_squirrel.acorns.TREE", new_callable=MemoryTree)
|
|
373
|
+
@patch("zombie_squirrel.acorns.MetadataDbClient")
|
|
374
|
+
def test_source_data_cache_miss(self, mock_client_class, mock_tree):
|
|
305
375
|
"""Test fetching source data when cache is empty."""
|
|
306
376
|
mock_client_instance = MagicMock()
|
|
307
377
|
mock_client_class.return_value = mock_client_instance
|
|
@@ -319,9 +389,9 @@ class TestSourceData(unittest.TestCase):
|
|
|
319
389
|
self.assertEqual(result.iloc[0]["source_data"], "src1, src2")
|
|
320
390
|
self.assertEqual(result.iloc[1]["source_data"], "")
|
|
321
391
|
|
|
322
|
-
@patch("zombie_squirrel.
|
|
323
|
-
@patch("zombie_squirrel.
|
|
324
|
-
def test_source_data_force_update(self, mock_client_class,
|
|
392
|
+
@patch("zombie_squirrel.acorns.TREE", new_callable=MemoryTree)
|
|
393
|
+
@patch("zombie_squirrel.acorns.MetadataDbClient")
|
|
394
|
+
def test_source_data_force_update(self, mock_client_class, mock_tree):
|
|
325
395
|
"""Test force_update bypasses cache."""
|
|
326
396
|
cached_df = pd.DataFrame(
|
|
327
397
|
{
|
|
@@ -329,7 +399,7 @@ class TestSourceData(unittest.TestCase):
|
|
|
329
399
|
"source_data": ["old_source"],
|
|
330
400
|
}
|
|
331
401
|
)
|
|
332
|
-
|
|
402
|
+
mock_tree.hide(NAMES["d2r"], cached_df)
|
|
333
403
|
|
|
334
404
|
mock_client_instance = MagicMock()
|
|
335
405
|
mock_client_class.return_value = mock_client_instance
|
|
@@ -349,9 +419,9 @@ class TestSourceData(unittest.TestCase):
|
|
|
349
419
|
class TestRawToDerived(unittest.TestCase):
|
|
350
420
|
"""Tests for raw_to_derived squirrel."""
|
|
351
421
|
|
|
352
|
-
@patch("zombie_squirrel.
|
|
353
|
-
@patch("zombie_squirrel.
|
|
354
|
-
def test_raw_to_derived_cache_hit(self, mock_client_class,
|
|
422
|
+
@patch("zombie_squirrel.acorns.TREE", new_callable=MemoryTree)
|
|
423
|
+
@patch("zombie_squirrel.acorns.MetadataDbClient")
|
|
424
|
+
def test_raw_to_derived_cache_hit(self, mock_client_class, mock_tree):
|
|
355
425
|
"""Test returning cached raw to derived mapping."""
|
|
356
426
|
cached_df = pd.DataFrame(
|
|
357
427
|
{
|
|
@@ -359,7 +429,7 @@ class TestRawToDerived(unittest.TestCase):
|
|
|
359
429
|
"derived_records": ["derived1, derived2", "derived3"],
|
|
360
430
|
}
|
|
361
431
|
)
|
|
362
|
-
|
|
432
|
+
mock_tree.hide(NAMES["r2d"], cached_df)
|
|
363
433
|
|
|
364
434
|
result = raw_to_derived()
|
|
365
435
|
|
|
@@ -367,9 +437,9 @@ class TestRawToDerived(unittest.TestCase):
|
|
|
367
437
|
self.assertEqual(result.iloc[0]["derived_records"], "derived1, derived2")
|
|
368
438
|
mock_client_class.assert_not_called()
|
|
369
439
|
|
|
370
|
-
@patch("zombie_squirrel.
|
|
371
|
-
@patch("zombie_squirrel.
|
|
372
|
-
def test_raw_to_derived_cache_miss(self, mock_client_class,
|
|
440
|
+
@patch("zombie_squirrel.acorns.TREE", new_callable=MemoryTree)
|
|
441
|
+
@patch("zombie_squirrel.acorns.MetadataDbClient")
|
|
442
|
+
def test_raw_to_derived_cache_miss(self, mock_client_class, mock_tree):
|
|
373
443
|
"""Test fetching raw to derived mapping when cache is empty."""
|
|
374
444
|
mock_client_instance = MagicMock()
|
|
375
445
|
mock_client_class.return_value = mock_client_instance
|
|
@@ -400,9 +470,9 @@ class TestRawToDerived(unittest.TestCase):
|
|
|
400
470
|
self.assertEqual(raw1_row.iloc[0]["derived_records"], "derived1, derived2")
|
|
401
471
|
self.assertEqual(raw2_row.iloc[0]["derived_records"], "derived2")
|
|
402
472
|
|
|
403
|
-
@patch("zombie_squirrel.
|
|
404
|
-
@patch("zombie_squirrel.
|
|
405
|
-
def test_raw_to_derived_no_derived(self, mock_client_class,
|
|
473
|
+
@patch("zombie_squirrel.acorns.TREE", new_callable=MemoryTree)
|
|
474
|
+
@patch("zombie_squirrel.acorns.MetadataDbClient")
|
|
475
|
+
def test_raw_to_derived_no_derived(self, mock_client_class, mock_tree):
|
|
406
476
|
"""Test raw records with no derived data."""
|
|
407
477
|
mock_client_instance = MagicMock()
|
|
408
478
|
mock_client_class.return_value = mock_client_instance
|
|
@@ -417,9 +487,9 @@ class TestRawToDerived(unittest.TestCase):
|
|
|
417
487
|
self.assertEqual(len(result), 1)
|
|
418
488
|
self.assertEqual(result.iloc[0]["derived_records"], "")
|
|
419
489
|
|
|
420
|
-
@patch("zombie_squirrel.
|
|
421
|
-
@patch("zombie_squirrel.
|
|
422
|
-
def test_raw_to_derived_force_update(self, mock_client_class,
|
|
490
|
+
@patch("zombie_squirrel.acorns.TREE", new_callable=MemoryTree)
|
|
491
|
+
@patch("zombie_squirrel.acorns.MetadataDbClient")
|
|
492
|
+
def test_raw_to_derived_force_update(self, mock_client_class, mock_tree):
|
|
423
493
|
"""Test force_update bypasses cache."""
|
|
424
494
|
cached_df = pd.DataFrame(
|
|
425
495
|
{
|
|
@@ -427,7 +497,7 @@ class TestRawToDerived(unittest.TestCase):
|
|
|
427
497
|
"derived_records": ["old_derived"],
|
|
428
498
|
}
|
|
429
499
|
)
|
|
430
|
-
|
|
500
|
+
mock_tree.hide(NAMES["r2d"], cached_df)
|
|
431
501
|
|
|
432
502
|
mock_client_instance = MagicMock()
|
|
433
503
|
mock_client_class.return_value = mock_client_instance
|
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
"""Unit tests for zombie_squirrel.sync module.
|
|
2
|
+
|
|
3
|
+
Tests for cache synchronization functions."""
|
|
4
|
+
|
|
5
|
+
import unittest
|
|
6
|
+
from unittest.mock import MagicMock, patch
|
|
7
|
+
|
|
8
|
+
from zombie_squirrel.sync import hide_acorns
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class TestHideAcorns(unittest.TestCase):
|
|
12
|
+
"""Tests for the hide_acorns function."""
|
|
13
|
+
|
|
14
|
+
@patch("zombie_squirrel.sync.ACORN_REGISTRY")
|
|
15
|
+
def test_hide_acorns_calls_all_acorns(self, mock_registry):
|
|
16
|
+
"""Test that hide_acorns calls all registered acorns with force_update."""
|
|
17
|
+
mock_acorn1 = MagicMock()
|
|
18
|
+
mock_acorn2 = MagicMock()
|
|
19
|
+
mock_acorn3 = MagicMock()
|
|
20
|
+
|
|
21
|
+
mock_registry.values.return_value = [
|
|
22
|
+
mock_acorn1,
|
|
23
|
+
mock_acorn2,
|
|
24
|
+
mock_acorn3,
|
|
25
|
+
]
|
|
26
|
+
|
|
27
|
+
hide_acorns()
|
|
28
|
+
|
|
29
|
+
mock_acorn1.assert_called_once_with(force_update=True)
|
|
30
|
+
mock_acorn2.assert_called_once_with(force_update=True)
|
|
31
|
+
mock_acorn3.assert_called_once_with(force_update=True)
|
|
32
|
+
|
|
33
|
+
@patch("zombie_squirrel.sync.ACORN_REGISTRY")
|
|
34
|
+
def test_hide_acorns_empty_registry(self, mock_registry):
|
|
35
|
+
"""Test hide_acorns with empty registry."""
|
|
36
|
+
mock_registry.values.return_value = []
|
|
37
|
+
|
|
38
|
+
# Should not raise any exception
|
|
39
|
+
hide_acorns()
|
|
40
|
+
|
|
41
|
+
mock_registry.values.assert_called_once()
|
|
42
|
+
|
|
43
|
+
@patch("zombie_squirrel.sync.ACORN_REGISTRY")
|
|
44
|
+
def test_hide_acorns_single_acorn(self, mock_registry):
|
|
45
|
+
"""Test hide_acorns with a single acorn."""
|
|
46
|
+
mock_acorn = MagicMock()
|
|
47
|
+
mock_registry.values.return_value = [mock_acorn]
|
|
48
|
+
|
|
49
|
+
hide_acorns()
|
|
50
|
+
|
|
51
|
+
mock_acorn.assert_called_once_with(force_update=True)
|
|
52
|
+
|
|
53
|
+
@patch("zombie_squirrel.sync.ACORN_REGISTRY")
|
|
54
|
+
def test_hide_acorns_acorn_order_independent(self, mock_registry):
|
|
55
|
+
"""Test that hide_acorns calls all acorns regardless of order."""
|
|
56
|
+
mock_acorns = [MagicMock() for _ in range(5)]
|
|
57
|
+
mock_registry.values.return_value = mock_acorns
|
|
58
|
+
|
|
59
|
+
hide_acorns()
|
|
60
|
+
|
|
61
|
+
# All acorns should be called with force_update=True
|
|
62
|
+
for acorn in mock_acorns:
|
|
63
|
+
acorn.assert_called_once_with(force_update=True)
|
|
64
|
+
|
|
65
|
+
@patch("zombie_squirrel.sync.ACORN_REGISTRY")
|
|
66
|
+
def test_hide_acorns_propagates_exceptions(self, mock_registry):
|
|
67
|
+
"""Test that exceptions from acorns are propagated."""
|
|
68
|
+
mock_acorn_ok = MagicMock()
|
|
69
|
+
mock_acorn_error = MagicMock(side_effect=Exception("Update failed"))
|
|
70
|
+
|
|
71
|
+
mock_registry.values.return_value = [
|
|
72
|
+
mock_acorn_ok,
|
|
73
|
+
mock_acorn_error,
|
|
74
|
+
]
|
|
75
|
+
|
|
76
|
+
with self.assertRaises(Exception) as context:
|
|
77
|
+
hide_acorns()
|
|
78
|
+
|
|
79
|
+
self.assertEqual(str(context.exception), "Update failed")
|
|
80
|
+
# First acorn should have been called
|
|
81
|
+
mock_acorn_ok.assert_called_once_with(force_update=True)
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
if __name__ == "__main__":
|
|
85
|
+
unittest.main()
|
|
@@ -1,73 +1,73 @@
|
|
|
1
|
-
"""Unit tests for zombie_squirrel.
|
|
1
|
+
"""Unit tests for zombie_squirrel.trees module.
|
|
2
2
|
|
|
3
3
|
Tests for abstract base class, memory backend, and S3 backend
|
|
4
4
|
for caching functionality."""
|
|
5
5
|
|
|
6
6
|
import unittest
|
|
7
|
-
from unittest.mock import MagicMock,
|
|
7
|
+
from unittest.mock import MagicMock, patch
|
|
8
8
|
|
|
9
9
|
import pandas as pd
|
|
10
10
|
|
|
11
|
-
from zombie_squirrel.
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
11
|
+
from zombie_squirrel.forest import (
|
|
12
|
+
Tree,
|
|
13
|
+
MemoryTree,
|
|
14
|
+
S3Tree,
|
|
15
15
|
)
|
|
16
16
|
|
|
17
17
|
|
|
18
|
-
class
|
|
19
|
-
"""Tests for
|
|
18
|
+
class TestTreeAbstractClass(unittest.TestCase):
|
|
19
|
+
"""Tests for Tree abstract base class."""
|
|
20
20
|
|
|
21
|
-
def
|
|
22
|
-
"""Test that
|
|
21
|
+
def test_tree_cannot_be_instantiated(self):
|
|
22
|
+
"""Test that Tree abstract class cannot be instantiated."""
|
|
23
23
|
with self.assertRaises(TypeError):
|
|
24
|
-
|
|
24
|
+
Tree()
|
|
25
25
|
|
|
26
|
-
def
|
|
26
|
+
def test_tree_subclass_must_implement_hide(self):
|
|
27
27
|
"""Test that subclasses must implement hide method."""
|
|
28
28
|
|
|
29
|
-
class
|
|
30
|
-
"""Incomplete
|
|
29
|
+
class IncompleteTree(Tree):
|
|
30
|
+
"""Incomplete Tree subclass missing hide method."""
|
|
31
31
|
|
|
32
32
|
def scurry(self, table_name: str) -> pd.DataFrame: # pragma: no cover
|
|
33
33
|
"""Fetch records from the cache."""
|
|
34
34
|
return pd.DataFrame()
|
|
35
35
|
|
|
36
36
|
with self.assertRaises(TypeError):
|
|
37
|
-
|
|
37
|
+
IncompleteTree()
|
|
38
38
|
|
|
39
|
-
def
|
|
39
|
+
def test_tree_subclass_must_implement_scurry(self):
|
|
40
40
|
"""Test that subclasses must implement scurry method."""
|
|
41
41
|
|
|
42
|
-
class
|
|
43
|
-
"""Incomplete
|
|
42
|
+
class IncompleteTree(Tree):
|
|
43
|
+
"""Incomplete Tree subclass missing scurry method."""
|
|
44
44
|
|
|
45
45
|
def hide(self, table_name: str, data: pd.DataFrame) -> None: # pragma: no cover
|
|
46
46
|
"""Store records in the cache."""
|
|
47
47
|
pass
|
|
48
48
|
|
|
49
49
|
with self.assertRaises(TypeError):
|
|
50
|
-
|
|
50
|
+
IncompleteTree()
|
|
51
51
|
|
|
52
52
|
|
|
53
|
-
class
|
|
54
|
-
"""Tests for
|
|
53
|
+
class TestMemoryTree(unittest.TestCase):
|
|
54
|
+
"""Tests for MemoryTree implementation."""
|
|
55
55
|
|
|
56
56
|
def setUp(self):
|
|
57
|
-
"""Initialize a fresh
|
|
58
|
-
self.
|
|
57
|
+
"""Initialize a fresh MemoryTree for each test."""
|
|
58
|
+
self.tree = MemoryTree()
|
|
59
59
|
|
|
60
60
|
def test_hide_and_scurry_basic(self):
|
|
61
61
|
"""Test basic hide and scurry operations."""
|
|
62
62
|
df = pd.DataFrame({"col1": [1, 2, 3], "col2": ["a", "b", "c"]})
|
|
63
|
-
self.
|
|
63
|
+
self.tree.hide("test_table", df)
|
|
64
64
|
|
|
65
|
-
retrieved = self.
|
|
65
|
+
retrieved = self.tree.scurry("test_table")
|
|
66
66
|
pd.testing.assert_frame_equal(df, retrieved)
|
|
67
67
|
|
|
68
68
|
def test_scurry_empty_table(self):
|
|
69
69
|
"""Test scurrying a table that doesn't exist returns empty DataFrame."""
|
|
70
|
-
result = self.
|
|
70
|
+
result = self.tree.scurry("nonexistent_table")
|
|
71
71
|
self.assertTrue(result.empty)
|
|
72
72
|
self.assertIsInstance(result, pd.DataFrame)
|
|
73
73
|
|
|
@@ -76,10 +76,10 @@ class TestMemoryAcorn(unittest.TestCase):
|
|
|
76
76
|
df1 = pd.DataFrame({"col1": [1, 2, 3]})
|
|
77
77
|
df2 = pd.DataFrame({"col1": [4, 5, 6]})
|
|
78
78
|
|
|
79
|
-
self.
|
|
80
|
-
self.
|
|
79
|
+
self.tree.hide("table", df1)
|
|
80
|
+
self.tree.hide("table", df2)
|
|
81
81
|
|
|
82
|
-
retrieved = self.
|
|
82
|
+
retrieved = self.tree.scurry("table")
|
|
83
83
|
pd.testing.assert_frame_equal(df2, retrieved)
|
|
84
84
|
|
|
85
85
|
def test_multiple_tables(self):
|
|
@@ -87,11 +87,11 @@ class TestMemoryAcorn(unittest.TestCase):
|
|
|
87
87
|
df1 = pd.DataFrame({"col1": [1, 2]})
|
|
88
88
|
df2 = pd.DataFrame({"col2": ["a", "b"]})
|
|
89
89
|
|
|
90
|
-
self.
|
|
91
|
-
self.
|
|
90
|
+
self.tree.hide("table1", df1)
|
|
91
|
+
self.tree.hide("table2", df2)
|
|
92
92
|
|
|
93
|
-
retrieved1 = self.
|
|
94
|
-
retrieved2 = self.
|
|
93
|
+
retrieved1 = self.tree.scurry("table1")
|
|
94
|
+
retrieved2 = self.tree.scurry("table2")
|
|
95
95
|
|
|
96
96
|
pd.testing.assert_frame_equal(df1, retrieved1)
|
|
97
97
|
pd.testing.assert_frame_equal(df2, retrieved2)
|
|
@@ -99,34 +99,34 @@ class TestMemoryAcorn(unittest.TestCase):
|
|
|
99
99
|
def test_hide_empty_dataframe(self):
|
|
100
100
|
"""Test hiding an empty DataFrame."""
|
|
101
101
|
df = pd.DataFrame()
|
|
102
|
-
self.
|
|
102
|
+
self.tree.hide("empty_table", df)
|
|
103
103
|
|
|
104
|
-
retrieved = self.
|
|
104
|
+
retrieved = self.tree.scurry("empty_table")
|
|
105
105
|
pd.testing.assert_frame_equal(df, retrieved)
|
|
106
106
|
|
|
107
107
|
|
|
108
|
-
class
|
|
109
|
-
"""Tests for
|
|
108
|
+
class TestS3Tree(unittest.TestCase):
|
|
109
|
+
"""Tests for S3Tree implementation with mocking."""
|
|
110
110
|
|
|
111
|
-
@patch("zombie_squirrel.
|
|
111
|
+
@patch("zombie_squirrel.forest.boto3.client")
|
|
112
112
|
def test_s3_acorn_initialization(self, mock_boto3_client):
|
|
113
|
-
"""Test
|
|
113
|
+
"""Test S3Tree initialization."""
|
|
114
114
|
mock_s3_client = MagicMock()
|
|
115
115
|
mock_boto3_client.return_value = mock_s3_client
|
|
116
116
|
|
|
117
|
-
acorn =
|
|
117
|
+
acorn = S3Tree()
|
|
118
118
|
|
|
119
119
|
self.assertEqual(acorn.bucket, "aind-scratch-data")
|
|
120
120
|
self.assertEqual(acorn.s3_client, mock_s3_client)
|
|
121
121
|
mock_boto3_client.assert_called_once_with("s3")
|
|
122
122
|
|
|
123
|
-
@patch("zombie_squirrel.
|
|
123
|
+
@patch("zombie_squirrel.forest.boto3.client")
|
|
124
124
|
def test_s3_hide(self, mock_boto3_client):
|
|
125
|
-
"""Test
|
|
125
|
+
"""Test S3Tree.hide method writes to S3."""
|
|
126
126
|
mock_s3_client = MagicMock()
|
|
127
127
|
mock_boto3_client.return_value = mock_s3_client
|
|
128
128
|
|
|
129
|
-
acorn =
|
|
129
|
+
acorn = S3Tree()
|
|
130
130
|
df = pd.DataFrame({"col1": [1, 2, 3]})
|
|
131
131
|
|
|
132
132
|
acorn.hide("test_table", df)
|
|
@@ -139,10 +139,10 @@ class TestS3Acorn(unittest.TestCase):
|
|
|
139
139
|
)
|
|
140
140
|
self.assertIsInstance(call_kwargs["Body"], bytes)
|
|
141
141
|
|
|
142
|
-
@patch("zombie_squirrel.
|
|
143
|
-
@patch("zombie_squirrel.
|
|
142
|
+
@patch("zombie_squirrel.forest.duckdb.query")
|
|
143
|
+
@patch("zombie_squirrel.forest.boto3.client")
|
|
144
144
|
def test_s3_scurry(self, mock_boto3_client, mock_duckdb_query):
|
|
145
|
-
"""Test
|
|
145
|
+
"""Test S3Tree.scurry method reads from S3 using DuckDB."""
|
|
146
146
|
mock_s3_client = MagicMock()
|
|
147
147
|
mock_boto3_client.return_value = mock_s3_client
|
|
148
148
|
|
|
@@ -151,7 +151,7 @@ class TestS3Acorn(unittest.TestCase):
|
|
|
151
151
|
mock_result.to_df.return_value = expected_df
|
|
152
152
|
mock_duckdb_query.return_value = mock_result
|
|
153
153
|
|
|
154
|
-
acorn =
|
|
154
|
+
acorn = S3Tree()
|
|
155
155
|
result = acorn.scurry("test_table")
|
|
156
156
|
|
|
157
157
|
# Verify DuckDB was called with correct S3 path
|
|
@@ -163,17 +163,17 @@ class TestS3Acorn(unittest.TestCase):
|
|
|
163
163
|
)
|
|
164
164
|
pd.testing.assert_frame_equal(result, expected_df)
|
|
165
165
|
|
|
166
|
-
@patch("zombie_squirrel.
|
|
167
|
-
@patch("zombie_squirrel.
|
|
166
|
+
@patch("zombie_squirrel.forest.duckdb.query")
|
|
167
|
+
@patch("zombie_squirrel.forest.boto3.client")
|
|
168
168
|
def test_s3_scurry_handles_error(
|
|
169
169
|
self, mock_boto3_client, mock_duckdb_query
|
|
170
170
|
):
|
|
171
|
-
"""Test
|
|
171
|
+
"""Test S3Tree.scurry returns empty DataFrame on error."""
|
|
172
172
|
mock_s3_client = MagicMock()
|
|
173
173
|
mock_boto3_client.return_value = mock_s3_client
|
|
174
174
|
mock_duckdb_query.side_effect = Exception("S3 access error")
|
|
175
175
|
|
|
176
|
-
acorn =
|
|
176
|
+
acorn = S3Tree()
|
|
177
177
|
result = acorn.scurry("nonexistent_table")
|
|
178
178
|
|
|
179
179
|
self.assertTrue(result.empty)
|
|
@@ -1,18 +0,0 @@
|
|
|
1
|
-
"""Synchronization utilities for updating all cached data."""
|
|
2
|
-
|
|
3
|
-
import logging
|
|
4
|
-
|
|
5
|
-
from .squirrels import SQUIRREL_REGISTRY
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
def hide_acorns():
|
|
9
|
-
"""Trigger force update of all registered squirrel functions.
|
|
10
|
-
|
|
11
|
-
Calls each squirrel function with force_update=True to refresh
|
|
12
|
-
all cached data in the acorn backend."""
|
|
13
|
-
logging.basicConfig(
|
|
14
|
-
level=logging.INFO,
|
|
15
|
-
format="%(asctime)s %(levelname)s %(message)s"
|
|
16
|
-
)
|
|
17
|
-
for squirrel in SQUIRREL_REGISTRY.values():
|
|
18
|
-
squirrel(force_update=True)
|
|
@@ -1,85 +0,0 @@
|
|
|
1
|
-
"""Unit tests for zombie_squirrel.sync module.
|
|
2
|
-
|
|
3
|
-
Tests for cache synchronization functions."""
|
|
4
|
-
|
|
5
|
-
import unittest
|
|
6
|
-
from unittest.mock import MagicMock, patch
|
|
7
|
-
|
|
8
|
-
from zombie_squirrel.sync import hide_acorns
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
class TestHideAcorns(unittest.TestCase):
|
|
12
|
-
"""Tests for the hide_acorns function."""
|
|
13
|
-
|
|
14
|
-
@patch("zombie_squirrel.sync.SQUIRREL_REGISTRY")
|
|
15
|
-
def test_hide_acorns_calls_all_squirrels(self, mock_registry):
|
|
16
|
-
"""Test that hide_acorns calls all registered squirrels with force_update."""
|
|
17
|
-
mock_squirrel1 = MagicMock()
|
|
18
|
-
mock_squirrel2 = MagicMock()
|
|
19
|
-
mock_squirrel3 = MagicMock()
|
|
20
|
-
|
|
21
|
-
mock_registry.values.return_value = [
|
|
22
|
-
mock_squirrel1,
|
|
23
|
-
mock_squirrel2,
|
|
24
|
-
mock_squirrel3,
|
|
25
|
-
]
|
|
26
|
-
|
|
27
|
-
hide_acorns()
|
|
28
|
-
|
|
29
|
-
mock_squirrel1.assert_called_once_with(force_update=True)
|
|
30
|
-
mock_squirrel2.assert_called_once_with(force_update=True)
|
|
31
|
-
mock_squirrel3.assert_called_once_with(force_update=True)
|
|
32
|
-
|
|
33
|
-
@patch("zombie_squirrel.sync.SQUIRREL_REGISTRY")
|
|
34
|
-
def test_hide_acorns_empty_registry(self, mock_registry):
|
|
35
|
-
"""Test hide_acorns with empty registry."""
|
|
36
|
-
mock_registry.values.return_value = []
|
|
37
|
-
|
|
38
|
-
# Should not raise any exception
|
|
39
|
-
hide_acorns()
|
|
40
|
-
|
|
41
|
-
mock_registry.values.assert_called_once()
|
|
42
|
-
|
|
43
|
-
@patch("zombie_squirrel.sync.SQUIRREL_REGISTRY")
|
|
44
|
-
def test_hide_acorns_single_squirrel(self, mock_registry):
|
|
45
|
-
"""Test hide_acorns with a single squirrel."""
|
|
46
|
-
mock_squirrel = MagicMock()
|
|
47
|
-
mock_registry.values.return_value = [mock_squirrel]
|
|
48
|
-
|
|
49
|
-
hide_acorns()
|
|
50
|
-
|
|
51
|
-
mock_squirrel.assert_called_once_with(force_update=True)
|
|
52
|
-
|
|
53
|
-
@patch("zombie_squirrel.sync.SQUIRREL_REGISTRY")
|
|
54
|
-
def test_hide_acorns_squirrel_order_independent(self, mock_registry):
|
|
55
|
-
"""Test that hide_acorns calls all squirrels regardless of order."""
|
|
56
|
-
mock_squirrels = [MagicMock() for _ in range(5)]
|
|
57
|
-
mock_registry.values.return_value = mock_squirrels
|
|
58
|
-
|
|
59
|
-
hide_acorns()
|
|
60
|
-
|
|
61
|
-
# All squirrels should be called with force_update=True
|
|
62
|
-
for squirrel in mock_squirrels:
|
|
63
|
-
squirrel.assert_called_once_with(force_update=True)
|
|
64
|
-
|
|
65
|
-
@patch("zombie_squirrel.sync.SQUIRREL_REGISTRY")
|
|
66
|
-
def test_hide_acorns_propagates_exceptions(self, mock_registry):
|
|
67
|
-
"""Test that exceptions from squirrels are propagated."""
|
|
68
|
-
mock_squirrel_ok = MagicMock()
|
|
69
|
-
mock_squirrel_error = MagicMock(side_effect=Exception("Update failed"))
|
|
70
|
-
|
|
71
|
-
mock_registry.values.return_value = [
|
|
72
|
-
mock_squirrel_ok,
|
|
73
|
-
mock_squirrel_error,
|
|
74
|
-
]
|
|
75
|
-
|
|
76
|
-
with self.assertRaises(Exception) as context:
|
|
77
|
-
hide_acorns()
|
|
78
|
-
|
|
79
|
-
self.assertEqual(str(context.exception), "Update failed")
|
|
80
|
-
# First squirrel should have been called
|
|
81
|
-
mock_squirrel_ok.assert_called_once_with(force_update=True)
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
if __name__ == "__main__":
|
|
85
|
-
unittest.main()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{zombie_squirrel-0.7.3 → zombie_squirrel-0.8.0}/src/zombie_squirrel.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|