zombie-squirrel 0.5.0__tar.gz → 0.5.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {zombie_squirrel-0.5.0/src/zombie_squirrel.egg-info → zombie_squirrel-0.5.2}/PKG-INFO +7 -3
- {zombie_squirrel-0.5.0 → zombie_squirrel-0.5.2}/README.md +1 -1
- {zombie_squirrel-0.5.0 → zombie_squirrel-0.5.2}/pyproject.toml +5 -1
- {zombie_squirrel-0.5.0 → zombie_squirrel-0.5.2}/src/zombie_squirrel/__init__.py +1 -1
- zombie_squirrel-0.5.2/src/zombie_squirrel/acorns.py +96 -0
- {zombie_squirrel-0.5.0 → zombie_squirrel-0.5.2}/src/zombie_squirrel/squirrels.py +13 -12
- zombie_squirrel-0.5.2/src/zombie_squirrel/utils.py +23 -0
- {zombie_squirrel-0.5.0 → zombie_squirrel-0.5.2/src/zombie_squirrel.egg-info}/PKG-INFO +7 -3
- zombie_squirrel-0.5.2/src/zombie_squirrel.egg-info/requires.txt +5 -0
- zombie_squirrel-0.5.2/tests/test_acorns.py +184 -0
- {zombie_squirrel-0.5.0 → zombie_squirrel-0.5.2}/tests/test_squirrels.py +41 -1
- zombie_squirrel-0.5.2/tests/test_utils.py +54 -0
- zombie_squirrel-0.5.0/src/zombie_squirrel/acorns.py +0 -81
- zombie_squirrel-0.5.0/src/zombie_squirrel/utils.py +0 -12
- zombie_squirrel-0.5.0/src/zombie_squirrel.egg-info/requires.txt +0 -1
- zombie_squirrel-0.5.0/tests/test_acorns.py +0 -217
- zombie_squirrel-0.5.0/tests/test_utils.py +0 -40
- {zombie_squirrel-0.5.0 → zombie_squirrel-0.5.2}/LICENSE +0 -0
- {zombie_squirrel-0.5.0 → zombie_squirrel-0.5.2}/setup.cfg +0 -0
- {zombie_squirrel-0.5.0 → zombie_squirrel-0.5.2}/setup.py +0 -0
- {zombie_squirrel-0.5.0 → zombie_squirrel-0.5.2}/src/zombie_squirrel/sync.py +0 -0
- {zombie_squirrel-0.5.0 → zombie_squirrel-0.5.2}/src/zombie_squirrel.egg-info/SOURCES.txt +0 -0
- {zombie_squirrel-0.5.0 → zombie_squirrel-0.5.2}/src/zombie_squirrel.egg-info/dependency_links.txt +0 -0
- {zombie_squirrel-0.5.0 → zombie_squirrel-0.5.2}/src/zombie_squirrel.egg-info/top_level.txt +0 -0
- {zombie_squirrel-0.5.0 → zombie_squirrel-0.5.2}/tests/test_sync.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: zombie-squirrel
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.2
|
|
4
4
|
Summary: Generated from aind-library-template
|
|
5
5
|
Author: Allen Institute for Neural Dynamics
|
|
6
6
|
License: MIT
|
|
@@ -8,7 +8,11 @@ Classifier: Programming Language :: Python :: 3
|
|
|
8
8
|
Requires-Python: >=3.10
|
|
9
9
|
Description-Content-Type: text/markdown
|
|
10
10
|
License-File: LICENSE
|
|
11
|
-
Requires-Dist:
|
|
11
|
+
Requires-Dist: duckdb
|
|
12
|
+
Requires-Dist: fastparquet
|
|
13
|
+
Requires-Dist: boto3
|
|
14
|
+
Requires-Dist: pandas
|
|
15
|
+
Requires-Dist: aind-data-access-api[docdb]
|
|
12
16
|
Dynamic: license-file
|
|
13
17
|
|
|
14
18
|
# zombie-squirrel
|
|
@@ -17,7 +21,7 @@ Dynamic: license-file
|
|
|
17
21
|

|
|
18
22
|
[](https://github.com/semantic-release/semantic-release)
|
|
19
23
|

|
|
20
|
-

|
|
21
25
|

|
|
22
26
|
|
|
23
27
|
<img src="zombie-squirrel_logo.png" width="400" alt="Logo (image from ChatGPT)">
|
|
@@ -4,7 +4,7 @@
|
|
|
4
4
|

|
|
5
5
|
[](https://github.com/semantic-release/semantic-release)
|
|
6
6
|

|
|
7
|
-

|
|
8
8
|

|
|
9
9
|
|
|
10
10
|
<img src="zombie-squirrel_logo.png" width="400" alt="Logo (image from ChatGPT)">
|
|
@@ -3,7 +3,7 @@
|
|
|
3
3
|
Provides functions to fetch and cache project names, subject IDs, and asset
|
|
4
4
|
metadata from the AIND metadata database with support for multiple backends."""
|
|
5
5
|
|
|
6
|
-
__version__ = "0.5.
|
|
6
|
+
__version__ = "0.5.2"
|
|
7
7
|
|
|
8
8
|
from zombie_squirrel.squirrels import ( # noqa: F401
|
|
9
9
|
asset_basics,
|
|
@@ -0,0 +1,96 @@
|
|
|
1
|
+
"""Storage backend interfaces for caching data."""
|
|
2
|
+
|
|
3
|
+
import io
|
|
4
|
+
import logging
|
|
5
|
+
from abc import ABC, abstractmethod
|
|
6
|
+
|
|
7
|
+
import boto3
|
|
8
|
+
import duckdb
|
|
9
|
+
import pandas as pd
|
|
10
|
+
|
|
11
|
+
from zombie_squirrel.utils import get_s3_cache_path, prefix_table_name
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
class Acorn(ABC):
|
|
15
|
+
"""Base class for a storage backend (the cache)."""
|
|
16
|
+
|
|
17
|
+
def __init__(self) -> None:
|
|
18
|
+
"""Initialize the Acorn."""
|
|
19
|
+
super().__init__()
|
|
20
|
+
|
|
21
|
+
@abstractmethod
|
|
22
|
+
def hide(self, table_name: str, data: pd.DataFrame) -> None:
|
|
23
|
+
"""Store records in the cache."""
|
|
24
|
+
pass # pragma: no cover
|
|
25
|
+
|
|
26
|
+
@abstractmethod
|
|
27
|
+
def scurry(self, table_name: str) -> pd.DataFrame:
|
|
28
|
+
"""Fetch records from the cache."""
|
|
29
|
+
pass # pragma: no cover
|
|
30
|
+
|
|
31
|
+
|
|
32
|
+
class S3Acorn(Acorn):
|
|
33
|
+
"""Stores and retrieves caches using AWS S3 with parquet files."""
|
|
34
|
+
|
|
35
|
+
def __init__(self) -> None:
|
|
36
|
+
"""Initialize S3Acorn with S3 client."""
|
|
37
|
+
self.bucket = "aind-scratch-data"
|
|
38
|
+
self.s3_client = boto3.client("s3")
|
|
39
|
+
|
|
40
|
+
def hide(self, table_name: str, data: pd.DataFrame) -> None:
|
|
41
|
+
"""Store DataFrame as parquet file in S3."""
|
|
42
|
+
filename = prefix_table_name(table_name)
|
|
43
|
+
s3_key = get_s3_cache_path(filename)
|
|
44
|
+
|
|
45
|
+
# Convert DataFrame to parquet bytes
|
|
46
|
+
parquet_buffer = io.BytesIO()
|
|
47
|
+
data.to_parquet(parquet_buffer, index=False)
|
|
48
|
+
parquet_buffer.seek(0)
|
|
49
|
+
|
|
50
|
+
# Upload to S3
|
|
51
|
+
self.s3_client.put_object(
|
|
52
|
+
Bucket=self.bucket,
|
|
53
|
+
Key=s3_key,
|
|
54
|
+
Body=parquet_buffer.getvalue(),
|
|
55
|
+
)
|
|
56
|
+
logging.info(f"Stored cache to S3: s3://{self.bucket}/{s3_key}")
|
|
57
|
+
|
|
58
|
+
def scurry(self, table_name: str) -> pd.DataFrame:
|
|
59
|
+
"""Fetch DataFrame from S3 parquet file."""
|
|
60
|
+
filename = prefix_table_name(table_name)
|
|
61
|
+
s3_key = get_s3_cache_path(filename)
|
|
62
|
+
|
|
63
|
+
try:
|
|
64
|
+
# Read directly from S3 using DuckDB
|
|
65
|
+
query = f"""
|
|
66
|
+
SELECT * FROM read_parquet(
|
|
67
|
+
's3://{self.bucket}/{s3_key}'
|
|
68
|
+
)
|
|
69
|
+
"""
|
|
70
|
+
result = duckdb.query(query).to_df()
|
|
71
|
+
logging.info(
|
|
72
|
+
f"Retrieved cache from S3: s3://{self.bucket}/{s3_key}"
|
|
73
|
+
)
|
|
74
|
+
return result
|
|
75
|
+
except Exception as e:
|
|
76
|
+
logging.warning(
|
|
77
|
+
f"Error fetching from cache {s3_key}: {e}"
|
|
78
|
+
)
|
|
79
|
+
return pd.DataFrame()
|
|
80
|
+
|
|
81
|
+
|
|
82
|
+
class MemoryAcorn(Acorn):
|
|
83
|
+
"""A simple in-memory backend for testing or local development."""
|
|
84
|
+
|
|
85
|
+
def __init__(self) -> None:
|
|
86
|
+
"""Initialize MemoryAcorn with empty store."""
|
|
87
|
+
super().__init__()
|
|
88
|
+
self._store: dict[str, pd.DataFrame] = {}
|
|
89
|
+
|
|
90
|
+
def hide(self, table_name: str, data: pd.DataFrame) -> None:
|
|
91
|
+
"""Store DataFrame in memory."""
|
|
92
|
+
self._store[table_name] = data
|
|
93
|
+
|
|
94
|
+
def scurry(self, table_name: str) -> pd.DataFrame:
|
|
95
|
+
"""Fetch DataFrame from memory."""
|
|
96
|
+
return self._store.get(table_name, pd.DataFrame())
|
|
@@ -10,8 +10,7 @@ from aind_data_access_api.document_db import MetadataDbClient
|
|
|
10
10
|
|
|
11
11
|
from zombie_squirrel.acorns import (
|
|
12
12
|
MemoryAcorn,
|
|
13
|
-
|
|
14
|
-
rds_get_handle_empty,
|
|
13
|
+
S3Acorn,
|
|
15
14
|
)
|
|
16
15
|
|
|
17
16
|
# --- Backend setup ---------------------------------------------------
|
|
@@ -20,9 +19,9 @@ API_GATEWAY_HOST = "api.allenneuraldynamics.org"
|
|
|
20
19
|
|
|
21
20
|
tree_type = os.getenv("TREE_SPECIES", "memory").lower()
|
|
22
21
|
|
|
23
|
-
if tree_type == "
|
|
24
|
-
logging.info("Using
|
|
25
|
-
ACORN =
|
|
22
|
+
if tree_type == "s3": # pragma: no cover
|
|
23
|
+
logging.info("Using S3 acorn for caching")
|
|
24
|
+
ACORN = S3Acorn()
|
|
26
25
|
else:
|
|
27
26
|
logging.info("Using in-memory acorn for caching")
|
|
28
27
|
ACORN = MemoryAcorn()
|
|
@@ -66,7 +65,7 @@ def unique_project_names(force_update: bool = False) -> list[str]:
|
|
|
66
65
|
|
|
67
66
|
Returns:
|
|
68
67
|
List of unique project names."""
|
|
69
|
-
df =
|
|
68
|
+
df = ACORN.scurry(NAMES["upn"])
|
|
70
69
|
|
|
71
70
|
if df.empty or force_update:
|
|
72
71
|
# If cache is missing, fetch data
|
|
@@ -99,7 +98,7 @@ def unique_subject_ids(force_update: bool = False) -> list[str]:
|
|
|
99
98
|
|
|
100
99
|
Returns:
|
|
101
100
|
List of unique subject IDs."""
|
|
102
|
-
df =
|
|
101
|
+
df = ACORN.scurry(NAMES["usi"])
|
|
103
102
|
|
|
104
103
|
if df.empty or force_update:
|
|
105
104
|
# If cache is missing, fetch data
|
|
@@ -134,7 +133,7 @@ def asset_basics(force_update: bool = False) -> pd.DataFrame:
|
|
|
134
133
|
|
|
135
134
|
Returns:
|
|
136
135
|
DataFrame with basic asset metadata."""
|
|
137
|
-
df =
|
|
136
|
+
df = ACORN.scurry(NAMES["basics"])
|
|
138
137
|
|
|
139
138
|
FIELDS = [
|
|
140
139
|
"data_description.modalities",
|
|
@@ -205,8 +204,10 @@ def asset_basics(force_update: bool = False) -> pd.DataFrame:
|
|
|
205
204
|
modality_abbreviations_str = ", ".join(modality_abbreviations)
|
|
206
205
|
|
|
207
206
|
# Get the process date, convert to YYYY-MM-DD if present
|
|
208
|
-
|
|
209
|
-
if
|
|
207
|
+
data_processes = record.get("processing", {}).get("data_processes", [])
|
|
208
|
+
if data_processes:
|
|
209
|
+
latest_process = data_processes[-1]
|
|
210
|
+
process_datetime = latest_process.get("start_date_time", None)
|
|
210
211
|
process_date = process_datetime.split("T")[0]
|
|
211
212
|
else:
|
|
212
213
|
process_date = None
|
|
@@ -247,7 +248,7 @@ def source_data(force_update: bool = False) -> pd.DataFrame:
|
|
|
247
248
|
|
|
248
249
|
Returns:
|
|
249
250
|
DataFrame with _id and source_data columns."""
|
|
250
|
-
df =
|
|
251
|
+
df = ACORN.scurry(NAMES["d2r"])
|
|
251
252
|
|
|
252
253
|
if df.empty or force_update:
|
|
253
254
|
logging.info("Updating cache for source data")
|
|
@@ -289,7 +290,7 @@ def raw_to_derived(force_update: bool = False) -> pd.DataFrame:
|
|
|
289
290
|
|
|
290
291
|
Returns:
|
|
291
292
|
DataFrame with _id and derived_records columns."""
|
|
292
|
-
df =
|
|
293
|
+
df = ACORN.scurry(NAMES["r2d"])
|
|
293
294
|
|
|
294
295
|
if df.empty or force_update:
|
|
295
296
|
logging.info("Updating cache for raw to derived mapping")
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
"""Utility functions for zombie-squirrel package."""
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
def prefix_table_name(table_name: str) -> str:
|
|
5
|
+
"""Add zombie-squirrel prefix and parquet extension to filenames.
|
|
6
|
+
|
|
7
|
+
Args:
|
|
8
|
+
table_name: The base table name.
|
|
9
|
+
|
|
10
|
+
Returns:
|
|
11
|
+
Filename with 'zs_' prefix and '.pqt' extension."""
|
|
12
|
+
return "zs_" + table_name + ".pqt"
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def get_s3_cache_path(filename: str) -> str:
|
|
16
|
+
"""Get the full S3 path for a cache file.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
filename: The cache filename (e.g., "zs_unique_project_names.pqt").
|
|
20
|
+
|
|
21
|
+
Returns:
|
|
22
|
+
Full S3 path: application-caches/filename"""
|
|
23
|
+
return f"application-caches/{filename}"
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: zombie-squirrel
|
|
3
|
-
Version: 0.5.
|
|
3
|
+
Version: 0.5.2
|
|
4
4
|
Summary: Generated from aind-library-template
|
|
5
5
|
Author: Allen Institute for Neural Dynamics
|
|
6
6
|
License: MIT
|
|
@@ -8,7 +8,11 @@ Classifier: Programming Language :: Python :: 3
|
|
|
8
8
|
Requires-Python: >=3.10
|
|
9
9
|
Description-Content-Type: text/markdown
|
|
10
10
|
License-File: LICENSE
|
|
11
|
-
Requires-Dist:
|
|
11
|
+
Requires-Dist: duckdb
|
|
12
|
+
Requires-Dist: fastparquet
|
|
13
|
+
Requires-Dist: boto3
|
|
14
|
+
Requires-Dist: pandas
|
|
15
|
+
Requires-Dist: aind-data-access-api[docdb]
|
|
12
16
|
Dynamic: license-file
|
|
13
17
|
|
|
14
18
|
# zombie-squirrel
|
|
@@ -17,7 +21,7 @@ Dynamic: license-file
|
|
|
17
21
|

|
|
18
22
|
[](https://github.com/semantic-release/semantic-release)
|
|
19
23
|

|
|
20
|
-

|
|
21
25
|

|
|
22
26
|
|
|
23
27
|
<img src="zombie-squirrel_logo.png" width="400" alt="Logo (image from ChatGPT)">
|
|
@@ -0,0 +1,184 @@
|
|
|
1
|
+
"""Unit tests for zombie_squirrel.acorns module.
|
|
2
|
+
|
|
3
|
+
Tests for abstract base class, memory backend, and S3 backend
|
|
4
|
+
for caching functionality."""
|
|
5
|
+
|
|
6
|
+
import unittest
|
|
7
|
+
from unittest.mock import MagicMock, Mock, patch
|
|
8
|
+
|
|
9
|
+
import pandas as pd
|
|
10
|
+
|
|
11
|
+
from zombie_squirrel.acorns import (
|
|
12
|
+
Acorn,
|
|
13
|
+
MemoryAcorn,
|
|
14
|
+
S3Acorn,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class TestAcornAbstractClass(unittest.TestCase):
|
|
19
|
+
"""Tests for Acorn abstract base class."""
|
|
20
|
+
|
|
21
|
+
def test_acorn_cannot_be_instantiated(self):
|
|
22
|
+
"""Test that Acorn abstract class cannot be instantiated."""
|
|
23
|
+
with self.assertRaises(TypeError):
|
|
24
|
+
Acorn()
|
|
25
|
+
|
|
26
|
+
def test_acorn_subclass_must_implement_hide(self):
|
|
27
|
+
"""Test that subclasses must implement hide method."""
|
|
28
|
+
|
|
29
|
+
class IncompleteAcorn(Acorn):
|
|
30
|
+
"""Incomplete Acorn subclass missing hide method."""
|
|
31
|
+
|
|
32
|
+
def scurry(self, table_name: str) -> pd.DataFrame: # pragma: no cover
|
|
33
|
+
"""Fetch records from the cache."""
|
|
34
|
+
return pd.DataFrame()
|
|
35
|
+
|
|
36
|
+
with self.assertRaises(TypeError):
|
|
37
|
+
IncompleteAcorn()
|
|
38
|
+
|
|
39
|
+
def test_acorn_subclass_must_implement_scurry(self):
|
|
40
|
+
"""Test that subclasses must implement scurry method."""
|
|
41
|
+
|
|
42
|
+
class IncompleteAcorn(Acorn):
|
|
43
|
+
"""Incomplete Acorn subclass missing scurry method."""
|
|
44
|
+
|
|
45
|
+
def hide(self, table_name: str, data: pd.DataFrame) -> None: # pragma: no cover
|
|
46
|
+
"""Store records in the cache."""
|
|
47
|
+
pass
|
|
48
|
+
|
|
49
|
+
with self.assertRaises(TypeError):
|
|
50
|
+
IncompleteAcorn()
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
class TestMemoryAcorn(unittest.TestCase):
|
|
54
|
+
"""Tests for MemoryAcorn implementation."""
|
|
55
|
+
|
|
56
|
+
def setUp(self):
|
|
57
|
+
"""Initialize a fresh MemoryAcorn for each test."""
|
|
58
|
+
self.acorn = MemoryAcorn()
|
|
59
|
+
|
|
60
|
+
def test_hide_and_scurry_basic(self):
|
|
61
|
+
"""Test basic hide and scurry operations."""
|
|
62
|
+
df = pd.DataFrame({"col1": [1, 2, 3], "col2": ["a", "b", "c"]})
|
|
63
|
+
self.acorn.hide("test_table", df)
|
|
64
|
+
|
|
65
|
+
retrieved = self.acorn.scurry("test_table")
|
|
66
|
+
pd.testing.assert_frame_equal(df, retrieved)
|
|
67
|
+
|
|
68
|
+
def test_scurry_empty_table(self):
|
|
69
|
+
"""Test scurrying a table that doesn't exist returns empty DataFrame."""
|
|
70
|
+
result = self.acorn.scurry("nonexistent_table")
|
|
71
|
+
self.assertTrue(result.empty)
|
|
72
|
+
self.assertIsInstance(result, pd.DataFrame)
|
|
73
|
+
|
|
74
|
+
def test_hide_overwrites_existing(self):
|
|
75
|
+
"""Test that hiding data overwrites existing data."""
|
|
76
|
+
df1 = pd.DataFrame({"col1": [1, 2, 3]})
|
|
77
|
+
df2 = pd.DataFrame({"col1": [4, 5, 6]})
|
|
78
|
+
|
|
79
|
+
self.acorn.hide("table", df1)
|
|
80
|
+
self.acorn.hide("table", df2)
|
|
81
|
+
|
|
82
|
+
retrieved = self.acorn.scurry("table")
|
|
83
|
+
pd.testing.assert_frame_equal(df2, retrieved)
|
|
84
|
+
|
|
85
|
+
def test_multiple_tables(self):
|
|
86
|
+
"""Test managing multiple tables."""
|
|
87
|
+
df1 = pd.DataFrame({"col1": [1, 2]})
|
|
88
|
+
df2 = pd.DataFrame({"col2": ["a", "b"]})
|
|
89
|
+
|
|
90
|
+
self.acorn.hide("table1", df1)
|
|
91
|
+
self.acorn.hide("table2", df2)
|
|
92
|
+
|
|
93
|
+
retrieved1 = self.acorn.scurry("table1")
|
|
94
|
+
retrieved2 = self.acorn.scurry("table2")
|
|
95
|
+
|
|
96
|
+
pd.testing.assert_frame_equal(df1, retrieved1)
|
|
97
|
+
pd.testing.assert_frame_equal(df2, retrieved2)
|
|
98
|
+
|
|
99
|
+
def test_hide_empty_dataframe(self):
|
|
100
|
+
"""Test hiding an empty DataFrame."""
|
|
101
|
+
df = pd.DataFrame()
|
|
102
|
+
self.acorn.hide("empty_table", df)
|
|
103
|
+
|
|
104
|
+
retrieved = self.acorn.scurry("empty_table")
|
|
105
|
+
pd.testing.assert_frame_equal(df, retrieved)
|
|
106
|
+
|
|
107
|
+
|
|
108
|
+
class TestS3Acorn(unittest.TestCase):
|
|
109
|
+
"""Tests for S3Acorn implementation with mocking."""
|
|
110
|
+
|
|
111
|
+
@patch("zombie_squirrel.acorns.boto3.client")
|
|
112
|
+
def test_s3_acorn_initialization(self, mock_boto3_client):
|
|
113
|
+
"""Test S3Acorn initialization."""
|
|
114
|
+
mock_s3_client = MagicMock()
|
|
115
|
+
mock_boto3_client.return_value = mock_s3_client
|
|
116
|
+
|
|
117
|
+
acorn = S3Acorn()
|
|
118
|
+
|
|
119
|
+
self.assertEqual(acorn.bucket, "aind-scratch-data")
|
|
120
|
+
self.assertEqual(acorn.s3_client, mock_s3_client)
|
|
121
|
+
mock_boto3_client.assert_called_once_with("s3")
|
|
122
|
+
|
|
123
|
+
@patch("zombie_squirrel.acorns.boto3.client")
|
|
124
|
+
def test_s3_hide(self, mock_boto3_client):
|
|
125
|
+
"""Test S3Acorn.hide method writes to S3."""
|
|
126
|
+
mock_s3_client = MagicMock()
|
|
127
|
+
mock_boto3_client.return_value = mock_s3_client
|
|
128
|
+
|
|
129
|
+
acorn = S3Acorn()
|
|
130
|
+
df = pd.DataFrame({"col1": [1, 2, 3]})
|
|
131
|
+
|
|
132
|
+
acorn.hide("test_table", df)
|
|
133
|
+
|
|
134
|
+
mock_s3_client.put_object.assert_called_once()
|
|
135
|
+
call_kwargs = mock_s3_client.put_object.call_args[1]
|
|
136
|
+
self.assertEqual(call_kwargs["Bucket"], "aind-scratch-data")
|
|
137
|
+
self.assertEqual(
|
|
138
|
+
call_kwargs["Key"], "application-caches/zs_test_table.pqt"
|
|
139
|
+
)
|
|
140
|
+
self.assertIsInstance(call_kwargs["Body"], bytes)
|
|
141
|
+
|
|
142
|
+
@patch("zombie_squirrel.acorns.duckdb.query")
|
|
143
|
+
@patch("zombie_squirrel.acorns.boto3.client")
|
|
144
|
+
def test_s3_scurry(self, mock_boto3_client, mock_duckdb_query):
|
|
145
|
+
"""Test S3Acorn.scurry method reads from S3 using DuckDB."""
|
|
146
|
+
mock_s3_client = MagicMock()
|
|
147
|
+
mock_boto3_client.return_value = mock_s3_client
|
|
148
|
+
|
|
149
|
+
expected_df = pd.DataFrame({"col1": [1, 2, 3]})
|
|
150
|
+
mock_result = MagicMock()
|
|
151
|
+
mock_result.to_df.return_value = expected_df
|
|
152
|
+
mock_duckdb_query.return_value = mock_result
|
|
153
|
+
|
|
154
|
+
acorn = S3Acorn()
|
|
155
|
+
result = acorn.scurry("test_table")
|
|
156
|
+
|
|
157
|
+
# Verify DuckDB was called with correct S3 path
|
|
158
|
+
mock_duckdb_query.assert_called_once()
|
|
159
|
+
query_call = mock_duckdb_query.call_args[0][0]
|
|
160
|
+
self.assertIn(
|
|
161
|
+
"s3://aind-scratch-data/application-caches/zs_test_table.pqt",
|
|
162
|
+
query_call,
|
|
163
|
+
)
|
|
164
|
+
pd.testing.assert_frame_equal(result, expected_df)
|
|
165
|
+
|
|
166
|
+
@patch("zombie_squirrel.acorns.duckdb.query")
|
|
167
|
+
@patch("zombie_squirrel.acorns.boto3.client")
|
|
168
|
+
def test_s3_scurry_handles_error(
|
|
169
|
+
self, mock_boto3_client, mock_duckdb_query
|
|
170
|
+
):
|
|
171
|
+
"""Test S3Acorn.scurry returns empty DataFrame on error."""
|
|
172
|
+
mock_s3_client = MagicMock()
|
|
173
|
+
mock_boto3_client.return_value = mock_s3_client
|
|
174
|
+
mock_duckdb_query.side_effect = Exception("S3 access error")
|
|
175
|
+
|
|
176
|
+
acorn = S3Acorn()
|
|
177
|
+
result = acorn.scurry("nonexistent_table")
|
|
178
|
+
|
|
179
|
+
self.assertTrue(result.empty)
|
|
180
|
+
self.assertIsInstance(result, pd.DataFrame)
|
|
181
|
+
|
|
182
|
+
|
|
183
|
+
if __name__ == "__main__":
|
|
184
|
+
unittest.main()
|
|
@@ -204,7 +204,47 @@ class TestAssetBasics(unittest.TestCase):
|
|
|
204
204
|
|
|
205
205
|
@patch("zombie_squirrel.squirrels.ACORN", new_callable=MemoryAcorn)
|
|
206
206
|
@patch("zombie_squirrel.squirrels.MetadataDbClient")
|
|
207
|
-
def
|
|
207
|
+
def test_asset_basics_with_data_processes(
|
|
208
|
+
self, mock_client_class, mock_acorn
|
|
209
|
+
):
|
|
210
|
+
"""Test asset_basics includes process_date from data_processes."""
|
|
211
|
+
mock_client_instance = MagicMock()
|
|
212
|
+
mock_client_class.return_value = mock_client_instance
|
|
213
|
+
|
|
214
|
+
mock_client_instance.retrieve_docdb_records.return_value = [
|
|
215
|
+
{
|
|
216
|
+
"_id": "id1",
|
|
217
|
+
"_last_modified": "2023-01-01",
|
|
218
|
+
"data_description": {
|
|
219
|
+
"modalities": [{"abbreviation": "img"}],
|
|
220
|
+
"project_name": "proj1",
|
|
221
|
+
"data_level": "raw",
|
|
222
|
+
},
|
|
223
|
+
"subject": {"subject_id": "sub001"},
|
|
224
|
+
"acquisition": {
|
|
225
|
+
"acquisition_start_time": "2023-01-01T10:00:00",
|
|
226
|
+
"acquisition_end_time": "2023-01-01T11:00:00",
|
|
227
|
+
},
|
|
228
|
+
"processing": {
|
|
229
|
+
"data_processes": [
|
|
230
|
+
{"start_date_time": "2023-01-15T14:30:00"},
|
|
231
|
+
{"start_date_time": "2023-01-20T09:15:00"},
|
|
232
|
+
]
|
|
233
|
+
},
|
|
234
|
+
}
|
|
235
|
+
]
|
|
236
|
+
|
|
237
|
+
result = asset_basics()
|
|
238
|
+
|
|
239
|
+
self.assertEqual(len(result), 1)
|
|
240
|
+
self.assertEqual(result.iloc[0]["_id"], "id1")
|
|
241
|
+
self.assertEqual(result.iloc[0]["process_date"], "2023-01-20")
|
|
242
|
+
|
|
243
|
+
@patch("zombie_squirrel.squirrels.ACORN", new_callable=MemoryAcorn)
|
|
244
|
+
@patch("zombie_squirrel.squirrels.MetadataDbClient")
|
|
245
|
+
def test_asset_basics_incremental_update(
|
|
246
|
+
self, mock_client_class, mock_acorn
|
|
247
|
+
):
|
|
208
248
|
"""Test incremental cache update with partial data refresh."""
|
|
209
249
|
mock_client_instance = MagicMock()
|
|
210
250
|
mock_client_class.return_value = mock_client_instance
|
|
@@ -0,0 +1,54 @@
|
|
|
1
|
+
"""Unit tests for zombie_squirrel.utils module.
|
|
2
|
+
|
|
3
|
+
Tests for utility functions."""
|
|
4
|
+
|
|
5
|
+
import unittest
|
|
6
|
+
|
|
7
|
+
from zombie_squirrel.utils import get_s3_cache_path, prefix_table_name
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class TestPrefixTableName(unittest.TestCase):
|
|
11
|
+
"""Tests for the prefix_table_name function."""
|
|
12
|
+
|
|
13
|
+
def test_prefix_table_name_basic(self):
|
|
14
|
+
"""Test that prefix_table_name adds 'zs_' prefix and '.pqt' ext."""
|
|
15
|
+
result = prefix_table_name("my_table")
|
|
16
|
+
self.assertEqual(result, "zs_my_table.pqt")
|
|
17
|
+
|
|
18
|
+
def test_prefix_table_name_empty_string(self):
|
|
19
|
+
"""Test with empty string."""
|
|
20
|
+
result = prefix_table_name("")
|
|
21
|
+
self.assertEqual(result, "zs_.pqt")
|
|
22
|
+
|
|
23
|
+
def test_prefix_table_name_single_char(self):
|
|
24
|
+
"""Test with single character."""
|
|
25
|
+
result = prefix_table_name("a")
|
|
26
|
+
self.assertEqual(result, "zs_a.pqt")
|
|
27
|
+
|
|
28
|
+
def test_prefix_table_name_with_underscores(self):
|
|
29
|
+
"""Test with table name containing underscores."""
|
|
30
|
+
result = prefix_table_name("my_long_table_name")
|
|
31
|
+
self.assertEqual(result, "zs_my_long_table_name.pqt")
|
|
32
|
+
|
|
33
|
+
def test_prefix_table_name_with_numbers(self):
|
|
34
|
+
"""Test with table name containing numbers."""
|
|
35
|
+
result = prefix_table_name("table123")
|
|
36
|
+
self.assertEqual(result, "zs_table123.pqt")
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class TestGetS3CachePath(unittest.TestCase):
|
|
40
|
+
"""Tests for the get_s3_cache_path function."""
|
|
41
|
+
|
|
42
|
+
def test_get_s3_cache_path_basic(self):
|
|
43
|
+
"""Test that get_s3_cache_path constructs correct S3 path."""
|
|
44
|
+
result = get_s3_cache_path("zs_test.pqt")
|
|
45
|
+
self.assertEqual(result, "application-caches/zs_test.pqt")
|
|
46
|
+
|
|
47
|
+
def test_get_s3_cache_path_various_names(self):
|
|
48
|
+
"""Test with various filenames."""
|
|
49
|
+
result = get_s3_cache_path("zs_my_data.pqt")
|
|
50
|
+
self.assertEqual(result, "application-caches/zs_my_data.pqt")
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
if __name__ == "__main__":
|
|
54
|
+
unittest.main()
|
|
@@ -1,81 +0,0 @@
|
|
|
1
|
-
"""Storage backend interfaces for caching data."""
|
|
2
|
-
|
|
3
|
-
import logging
|
|
4
|
-
import os
|
|
5
|
-
from abc import ABC, abstractmethod
|
|
6
|
-
|
|
7
|
-
import pandas as pd
|
|
8
|
-
from aind_data_access_api.rds_tables import Client, RDSCredentials
|
|
9
|
-
|
|
10
|
-
from zombie_squirrel.utils import prefix_table_name
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
class Acorn(ABC):
|
|
14
|
-
"""Base class for a storage backend (the cache)."""
|
|
15
|
-
|
|
16
|
-
def __init__(self) -> None:
|
|
17
|
-
"""Initialize the Acorn."""
|
|
18
|
-
super().__init__()
|
|
19
|
-
|
|
20
|
-
@abstractmethod
|
|
21
|
-
def hide(self, table_name: str, data: pd.DataFrame) -> None:
|
|
22
|
-
"""Store records in the cache."""
|
|
23
|
-
pass # pragma: no cover
|
|
24
|
-
|
|
25
|
-
@abstractmethod
|
|
26
|
-
def scurry(self, table_name: str) -> pd.DataFrame:
|
|
27
|
-
"""Fetch records from the cache."""
|
|
28
|
-
pass # pragma: no cover
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
class RedshiftAcorn(Acorn):
|
|
32
|
-
"""Stores and retrieves caches using aind-data-access-api
|
|
33
|
-
Redshift Client"""
|
|
34
|
-
|
|
35
|
-
def __init__(self) -> None:
|
|
36
|
-
"""Initialize RedshiftAcorn with Redshift credentials."""
|
|
37
|
-
REDSHIFT_SECRETS = os.getenv("REDSHIFT_SECRETS", "/aind/prod/redshift/credentials/readwrite")
|
|
38
|
-
self.rds_client = Client(
|
|
39
|
-
credentials=RDSCredentials(aws_secrets_name=REDSHIFT_SECRETS),
|
|
40
|
-
)
|
|
41
|
-
|
|
42
|
-
def hide(self, table_name: str, data: pd.DataFrame) -> None:
|
|
43
|
-
"""Store DataFrame in Redshift table."""
|
|
44
|
-
self.rds_client.overwrite_table_with_df(
|
|
45
|
-
df=data,
|
|
46
|
-
table_name=prefix_table_name(table_name),
|
|
47
|
-
)
|
|
48
|
-
|
|
49
|
-
def scurry(self, table_name: str) -> pd.DataFrame:
|
|
50
|
-
"""Fetch DataFrame from Redshift table."""
|
|
51
|
-
return self.rds_client.read_table(table_name=prefix_table_name(table_name))
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
class MemoryAcorn(Acorn):
|
|
55
|
-
"""A simple in-memory backend for testing or local development."""
|
|
56
|
-
|
|
57
|
-
def __init__(self) -> None:
|
|
58
|
-
"""Initialize MemoryAcorn with empty store."""
|
|
59
|
-
super().__init__()
|
|
60
|
-
self._store: dict[str, pd.DataFrame] = {}
|
|
61
|
-
|
|
62
|
-
def hide(self, table_name: str, data: pd.DataFrame) -> None:
|
|
63
|
-
"""Store DataFrame in memory."""
|
|
64
|
-
self._store[table_name] = data
|
|
65
|
-
|
|
66
|
-
def scurry(self, table_name: str) -> pd.DataFrame:
|
|
67
|
-
"""Fetch DataFrame from memory."""
|
|
68
|
-
return self._store.get(table_name, pd.DataFrame())
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
def rds_get_handle_empty(acorn: Acorn, table_name: str) -> pd.DataFrame:
|
|
72
|
-
"""Helper for handling errors when loading from redshift, because
|
|
73
|
-
there's no helper function"""
|
|
74
|
-
try:
|
|
75
|
-
logging.info(f"Fetching from cache: {table_name}")
|
|
76
|
-
df = acorn.scurry(table_name)
|
|
77
|
-
except Exception as e:
|
|
78
|
-
logging.warning(f"Error fetching from cache: {e}")
|
|
79
|
-
df = pd.DataFrame()
|
|
80
|
-
|
|
81
|
-
return df
|
|
@@ -1,12 +0,0 @@
|
|
|
1
|
-
"""Utility functions for zombie-squirrel package."""
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
def prefix_table_name(table_name: str) -> str:
|
|
5
|
-
"""Add zombie-squirrel prefix to table names.
|
|
6
|
-
|
|
7
|
-
Args:
|
|
8
|
-
table_name: The base table name.
|
|
9
|
-
|
|
10
|
-
Returns:
|
|
11
|
-
Table name with 'zs_' prefix."""
|
|
12
|
-
return "zs_" + table_name
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
aind-data-access-api[docdb,rds]
|
|
@@ -1,217 +0,0 @@
|
|
|
1
|
-
"""Unit tests for zombie_squirrel.acorns module.
|
|
2
|
-
|
|
3
|
-
Tests for abstract base class, memory backend, and Redshift backend
|
|
4
|
-
for caching functionality."""
|
|
5
|
-
|
|
6
|
-
import os
|
|
7
|
-
import unittest
|
|
8
|
-
from unittest.mock import MagicMock, Mock, patch
|
|
9
|
-
|
|
10
|
-
import pandas as pd
|
|
11
|
-
|
|
12
|
-
from zombie_squirrel.acorns import (
|
|
13
|
-
Acorn,
|
|
14
|
-
MemoryAcorn,
|
|
15
|
-
RedshiftAcorn,
|
|
16
|
-
rds_get_handle_empty,
|
|
17
|
-
)
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
class TestAcornAbstractClass(unittest.TestCase):
|
|
21
|
-
"""Tests for Acorn abstract base class."""
|
|
22
|
-
|
|
23
|
-
def test_acorn_cannot_be_instantiated(self):
|
|
24
|
-
"""Test that Acorn abstract class cannot be instantiated."""
|
|
25
|
-
with self.assertRaises(TypeError):
|
|
26
|
-
Acorn()
|
|
27
|
-
|
|
28
|
-
def test_acorn_subclass_must_implement_hide(self):
|
|
29
|
-
"""Test that subclasses must implement hide method."""
|
|
30
|
-
|
|
31
|
-
class IncompleteAcorn(Acorn):
|
|
32
|
-
"""Incomplete Acorn subclass missing hide method."""
|
|
33
|
-
|
|
34
|
-
def scurry(self, table_name: str) -> pd.DataFrame: # pragma: no cover
|
|
35
|
-
"""Fetch records from the cache."""
|
|
36
|
-
return pd.DataFrame()
|
|
37
|
-
|
|
38
|
-
with self.assertRaises(TypeError):
|
|
39
|
-
IncompleteAcorn()
|
|
40
|
-
|
|
41
|
-
def test_acorn_subclass_must_implement_scurry(self):
|
|
42
|
-
"""Test that subclasses must implement scurry method."""
|
|
43
|
-
|
|
44
|
-
class IncompleteAcorn(Acorn):
|
|
45
|
-
"""Incomplete Acorn subclass missing scurry method."""
|
|
46
|
-
|
|
47
|
-
def hide(self, table_name: str, data: pd.DataFrame) -> None: # pragma: no cover
|
|
48
|
-
"""Store records in the cache."""
|
|
49
|
-
pass
|
|
50
|
-
|
|
51
|
-
with self.assertRaises(TypeError):
|
|
52
|
-
IncompleteAcorn()
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
class TestMemoryAcorn(unittest.TestCase):
|
|
56
|
-
"""Tests for MemoryAcorn implementation."""
|
|
57
|
-
|
|
58
|
-
def setUp(self):
|
|
59
|
-
"""Initialize a fresh MemoryAcorn for each test."""
|
|
60
|
-
self.acorn = MemoryAcorn()
|
|
61
|
-
|
|
62
|
-
def test_hide_and_scurry_basic(self):
|
|
63
|
-
"""Test basic hide and scurry operations."""
|
|
64
|
-
df = pd.DataFrame({"col1": [1, 2, 3], "col2": ["a", "b", "c"]})
|
|
65
|
-
self.acorn.hide("test_table", df)
|
|
66
|
-
|
|
67
|
-
retrieved = self.acorn.scurry("test_table")
|
|
68
|
-
pd.testing.assert_frame_equal(df, retrieved)
|
|
69
|
-
|
|
70
|
-
def test_scurry_empty_table(self):
|
|
71
|
-
"""Test scurrying a table that doesn't exist returns empty DataFrame."""
|
|
72
|
-
result = self.acorn.scurry("nonexistent_table")
|
|
73
|
-
self.assertTrue(result.empty)
|
|
74
|
-
self.assertIsInstance(result, pd.DataFrame)
|
|
75
|
-
|
|
76
|
-
def test_hide_overwrites_existing(self):
|
|
77
|
-
"""Test that hiding data overwrites existing data."""
|
|
78
|
-
df1 = pd.DataFrame({"col1": [1, 2, 3]})
|
|
79
|
-
df2 = pd.DataFrame({"col1": [4, 5, 6]})
|
|
80
|
-
|
|
81
|
-
self.acorn.hide("table", df1)
|
|
82
|
-
self.acorn.hide("table", df2)
|
|
83
|
-
|
|
84
|
-
retrieved = self.acorn.scurry("table")
|
|
85
|
-
pd.testing.assert_frame_equal(df2, retrieved)
|
|
86
|
-
|
|
87
|
-
def test_multiple_tables(self):
|
|
88
|
-
"""Test managing multiple tables."""
|
|
89
|
-
df1 = pd.DataFrame({"col1": [1, 2]})
|
|
90
|
-
df2 = pd.DataFrame({"col2": ["a", "b"]})
|
|
91
|
-
|
|
92
|
-
self.acorn.hide("table1", df1)
|
|
93
|
-
self.acorn.hide("table2", df2)
|
|
94
|
-
|
|
95
|
-
retrieved1 = self.acorn.scurry("table1")
|
|
96
|
-
retrieved2 = self.acorn.scurry("table2")
|
|
97
|
-
|
|
98
|
-
pd.testing.assert_frame_equal(df1, retrieved1)
|
|
99
|
-
pd.testing.assert_frame_equal(df2, retrieved2)
|
|
100
|
-
|
|
101
|
-
def test_hide_empty_dataframe(self):
|
|
102
|
-
"""Test hiding an empty DataFrame."""
|
|
103
|
-
df = pd.DataFrame()
|
|
104
|
-
self.acorn.hide("empty_table", df)
|
|
105
|
-
|
|
106
|
-
retrieved = self.acorn.scurry("empty_table")
|
|
107
|
-
pd.testing.assert_frame_equal(df, retrieved)
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
class TestRedshiftAcorn(unittest.TestCase):
|
|
111
|
-
"""Tests for RedshiftAcorn implementation with mocking."""
|
|
112
|
-
|
|
113
|
-
@patch("zombie_squirrel.acorns.RDSCredentials")
|
|
114
|
-
@patch("zombie_squirrel.acorns.Client")
|
|
115
|
-
def test_redshift_acorn_initialization(self, mock_client_class, mock_credentials_class):
|
|
116
|
-
"""Test RedshiftAcorn initialization."""
|
|
117
|
-
mock_client_instance = MagicMock()
|
|
118
|
-
mock_client_class.return_value = mock_client_instance
|
|
119
|
-
mock_credentials_instance = MagicMock()
|
|
120
|
-
mock_credentials_class.return_value = mock_credentials_instance
|
|
121
|
-
|
|
122
|
-
acorn = RedshiftAcorn()
|
|
123
|
-
|
|
124
|
-
self.assertEqual(acorn.rds_client, mock_client_instance)
|
|
125
|
-
mock_client_class.assert_called_once()
|
|
126
|
-
|
|
127
|
-
@patch("zombie_squirrel.acorns.RDSCredentials")
|
|
128
|
-
@patch("zombie_squirrel.acorns.Client")
|
|
129
|
-
def test_redshift_hide(self, mock_client_class, mock_credentials_class):
|
|
130
|
-
"""Test RedshiftAcorn.hide method."""
|
|
131
|
-
mock_client_instance = MagicMock()
|
|
132
|
-
mock_client_class.return_value = mock_client_instance
|
|
133
|
-
mock_credentials_instance = MagicMock()
|
|
134
|
-
mock_credentials_class.return_value = mock_credentials_instance
|
|
135
|
-
|
|
136
|
-
acorn = RedshiftAcorn()
|
|
137
|
-
df = pd.DataFrame({"col1": [1, 2, 3]})
|
|
138
|
-
|
|
139
|
-
acorn.hide("test_table", df)
|
|
140
|
-
|
|
141
|
-
mock_client_instance.overwrite_table_with_df.assert_called_once()
|
|
142
|
-
call_args = mock_client_instance.overwrite_table_with_df.call_args
|
|
143
|
-
pd.testing.assert_frame_equal(call_args[1]["df"], df)
|
|
144
|
-
self.assertEqual(call_args[1]["table_name"], "zs_test_table")
|
|
145
|
-
|
|
146
|
-
@patch("zombie_squirrel.acorns.RDSCredentials")
|
|
147
|
-
@patch("zombie_squirrel.acorns.Client")
|
|
148
|
-
def test_redshift_scurry(self, mock_client_class, mock_credentials_class):
|
|
149
|
-
"""Test RedshiftAcorn.scurry method."""
|
|
150
|
-
mock_client_instance = MagicMock()
|
|
151
|
-
expected_df = pd.DataFrame({"col1": [1, 2, 3]})
|
|
152
|
-
mock_client_instance.read_table.return_value = expected_df
|
|
153
|
-
mock_client_class.return_value = mock_client_instance
|
|
154
|
-
mock_credentials_instance = MagicMock()
|
|
155
|
-
mock_credentials_class.return_value = mock_credentials_instance
|
|
156
|
-
|
|
157
|
-
acorn = RedshiftAcorn()
|
|
158
|
-
result = acorn.scurry("test_table")
|
|
159
|
-
|
|
160
|
-
mock_client_instance.read_table.assert_called_once_with(table_name="zs_test_table")
|
|
161
|
-
pd.testing.assert_frame_equal(result, expected_df)
|
|
162
|
-
|
|
163
|
-
@patch.dict("os.environ", {}, clear=False)
|
|
164
|
-
@patch("zombie_squirrel.acorns.RDSCredentials")
|
|
165
|
-
@patch("zombie_squirrel.acorns.Client")
|
|
166
|
-
def test_redshift_default_secrets_path(self, mock_client_class, mock_credentials_class):
|
|
167
|
-
"""Test RedshiftAcorn uses default secrets path."""
|
|
168
|
-
if "REDSHIFT_SECRETS" in os.environ: # pragma: no cover
|
|
169
|
-
del os.environ["REDSHIFT_SECRETS"] # pragma: no cover
|
|
170
|
-
|
|
171
|
-
mock_client_instance = MagicMock()
|
|
172
|
-
mock_client_class.return_value = mock_client_instance
|
|
173
|
-
mock_credentials_instance = MagicMock()
|
|
174
|
-
mock_credentials_class.return_value = mock_credentials_instance
|
|
175
|
-
|
|
176
|
-
RedshiftAcorn()
|
|
177
|
-
|
|
178
|
-
mock_client_class.assert_called_once()
|
|
179
|
-
call_args = mock_client_class.call_args
|
|
180
|
-
self.assertIsNotNone(call_args)
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
class TestRdsGetHandleEmpty(unittest.TestCase):
|
|
184
|
-
"""Tests for rds_get_handle_empty helper function."""
|
|
185
|
-
|
|
186
|
-
def test_rds_get_handle_empty_success(self):
|
|
187
|
-
"""Test successful retrieval from acorn."""
|
|
188
|
-
acorn = MemoryAcorn()
|
|
189
|
-
df = pd.DataFrame({"col1": [1, 2, 3]})
|
|
190
|
-
acorn.hide("test_table", df)
|
|
191
|
-
|
|
192
|
-
result = rds_get_handle_empty(acorn, "test_table")
|
|
193
|
-
|
|
194
|
-
pd.testing.assert_frame_equal(result, df)
|
|
195
|
-
|
|
196
|
-
def test_rds_get_handle_empty_missing_table(self):
|
|
197
|
-
"""Test returns empty DataFrame when table is missing."""
|
|
198
|
-
acorn = MemoryAcorn()
|
|
199
|
-
|
|
200
|
-
result = rds_get_handle_empty(acorn, "nonexistent_table")
|
|
201
|
-
|
|
202
|
-
self.assertTrue(result.empty)
|
|
203
|
-
self.assertIsInstance(result, pd.DataFrame)
|
|
204
|
-
|
|
205
|
-
def test_rds_get_handle_empty_exception(self):
|
|
206
|
-
"""Test returns empty DataFrame when acorn raises exception."""
|
|
207
|
-
acorn = Mock(spec=["scurry"])
|
|
208
|
-
acorn.scurry.side_effect = Exception("Connection error")
|
|
209
|
-
|
|
210
|
-
result = rds_get_handle_empty(acorn, "test_table")
|
|
211
|
-
|
|
212
|
-
self.assertTrue(result.empty)
|
|
213
|
-
self.assertIsInstance(result, pd.DataFrame)
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
if __name__ == "__main__":
|
|
217
|
-
unittest.main()
|
|
@@ -1,40 +0,0 @@
|
|
|
1
|
-
"""Unit tests for zombie_squirrel.utils module.
|
|
2
|
-
|
|
3
|
-
Tests for utility functions."""
|
|
4
|
-
|
|
5
|
-
import unittest
|
|
6
|
-
|
|
7
|
-
from zombie_squirrel.utils import prefix_table_name
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
class TestPrefixTableName(unittest.TestCase):
|
|
11
|
-
"""Tests for the prefix_table_name function."""
|
|
12
|
-
|
|
13
|
-
def test_prefix_table_name_basic(self):
|
|
14
|
-
"""Test that prefix_table_name correctly adds 'zs_' prefix."""
|
|
15
|
-
result = prefix_table_name("my_table")
|
|
16
|
-
self.assertEqual(result, "zs_my_table")
|
|
17
|
-
|
|
18
|
-
def test_prefix_table_name_empty_string(self):
|
|
19
|
-
"""Test with empty string."""
|
|
20
|
-
result = prefix_table_name("")
|
|
21
|
-
self.assertEqual(result, "zs_")
|
|
22
|
-
|
|
23
|
-
def test_prefix_table_name_single_char(self):
|
|
24
|
-
"""Test with single character."""
|
|
25
|
-
result = prefix_table_name("a")
|
|
26
|
-
self.assertEqual(result, "zs_a")
|
|
27
|
-
|
|
28
|
-
def test_prefix_table_name_with_underscores(self):
|
|
29
|
-
"""Test with table name containing underscores."""
|
|
30
|
-
result = prefix_table_name("my_long_table_name")
|
|
31
|
-
self.assertEqual(result, "zs_my_long_table_name")
|
|
32
|
-
|
|
33
|
-
def test_prefix_table_name_with_numbers(self):
|
|
34
|
-
"""Test with table name containing numbers."""
|
|
35
|
-
result = prefix_table_name("table123")
|
|
36
|
-
self.assertEqual(result, "zs_table123")
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
if __name__ == "__main__":
|
|
40
|
-
unittest.main()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{zombie_squirrel-0.5.0 → zombie_squirrel-0.5.2}/src/zombie_squirrel.egg-info/dependency_links.txt
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|