zombie-squirrel 0.2.3__py3-none-any.whl → 0.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,5 +1,5 @@
1
1
  """Init package"""
2
- __version__ = "0.2.3"
2
+ __version__ = "0.3.0"
3
3
 
4
4
  from zombie_squirrel.squirrels import (
5
5
  unique_project_names,
zombie_squirrel/acorns.py CHANGED
@@ -59,7 +59,8 @@ class MemoryAcorn(Acorn):
59
59
 
60
60
 
61
61
  def rds_get_handle_empty(acorn: Acorn, table_name: str) -> pd.DataFrame:
62
- """Utility function for testing purposes."""
62
+ """Helper for handling errors when loading from redshift, because
63
+ there's no helper function """
63
64
  try:
64
65
  logging.info(f"Fetching from cache: {table_name}")
65
66
  df = acorn.scurry(table_name)
@@ -37,6 +37,7 @@ def register_squirrel(name: str):
37
37
  NAMES = {
38
38
  "upn": "unique_project_names",
39
39
  "usi": "unique_subject_ids",
40
+ "basics": "asset_basics",
40
41
  }
41
42
 
42
43
 
@@ -84,3 +85,61 @@ def unique_subject_ids(force_update: bool = False) -> list[str]:
84
85
  ACORN.hide(NAMES["usi"], df)
85
86
 
86
87
  return df["subject_id"].tolist()
88
+
89
+
90
+ @register_squirrel(NAMES["basics"])
91
+ def asset_basics(force_update: bool = False) -> pd.DataFrame:
92
+ """Basic asset metadata.
93
+
94
+ _id, last_modified,
95
+ modalities, project names, data_level, subject_id, acquisition_start and _end
96
+ """
97
+ df = rds_get_handle_empty(ACORN, NAMES["basics"])
98
+
99
+ FIELDS = [
100
+ "data_description.modalities",
101
+ "data_description.project_name",
102
+ "data_description.data_level",
103
+ "subject.subject_id",
104
+ "acquisition.acquisition_start_time",
105
+ "acquisition.acquisition_end_time",
106
+ ]
107
+
108
+ if df.empty or force_update:
109
+ logging.info("Updating cache for asset basics")
110
+ client = MetadataDbClient(
111
+ host=API_GATEWAY_HOST,
112
+ version="v2",
113
+ )
114
+ # It's a bit complex to get multiple fields that aren't indexed in a database
115
+ # as large as DocDB. We'll also try to limit ourselves to only updating fields
116
+ # that are necessary
117
+ record_ids = client.retrieve_docdb_records(
118
+ filter_query={}, projection={"_id": 1, "last_modified": 1}, limit=0,
119
+ )
120
+ keep_ids = []
121
+ # Drop all _ids where last_modified matches cache
122
+ for record in record_ids:
123
+ cached_row = df[df["_id"] == record["_id"]]
124
+ if cached_row.empty or cached_row["last_modified"].values[0] != record["last_modified"]:
125
+ keep_ids.append(record["_id"])
126
+
127
+ # Now batch by 100 IDs at a time to avoid overloading server, and fetch all the fields
128
+ BATCH_SIZE = 100
129
+ asset_records = []
130
+ for i in range(0, len(keep_ids), BATCH_SIZE):
131
+ batch_ids = keep_ids[i:i + BATCH_SIZE]
132
+ batch_records = client.retrieve_docdb_records(
133
+ filter_query={"_id": {"$in": batch_ids}},
134
+ projection={field: 1 for field in FIELDS + ["_id", "last_modified"]},
135
+ limit=0,
136
+ )
137
+ asset_records.extend(batch_records)
138
+
139
+ # Combine new records with the old df and store in cache
140
+ new_df = pd.DataFrame(asset_records)
141
+ df = pd.concat([df[df["_id"].isin(keep_ids) == False], new_df], ignore_index=True)
142
+
143
+ ACORN.hide(NAMES["basics"], df)
144
+
145
+ return df
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: zombie-squirrel
3
- Version: 0.2.3
3
+ Version: 0.3.0
4
4
  Summary: Generated from aind-library-template
5
5
  Author: Allen Institute for Neural Dynamics
6
6
  License: MIT
@@ -0,0 +1,10 @@
1
+ zombie_squirrel/__init__.py,sha256=SU-iCf1qQAQUKc6VnAdEUcGBlcUwuDZ3CKpJXGSrzb0,134
2
+ zombie_squirrel/acorns.py,sha256=1mCnWCDFRnbHLddCCgiUG3RumuKUjMKVbyTVoYI0FB8,2188
3
+ zombie_squirrel/squirrels.py,sha256=BkS4l9cUB7ZXQV3ySa0HCgSLei_Kc47X28BJY69Bz54,4915
4
+ zombie_squirrel/sync.py,sha256=jslTVIend5Z-sLJuNXKkhn-nqmKK_P0FAiRuFFYRnto,168
5
+ zombie_squirrel/utils.py,sha256=74DSFK1Qbp8yQeUXpnli4kqx_QcAc8v4_6FZut0xZ8g,103
6
+ zombie_squirrel-0.3.0.dist-info/licenses/LICENSE,sha256=U0Y7B3gZJHXpjJVLgTQjM8e_c8w4JJpLgGhIdsoFR1Y,1092
7
+ zombie_squirrel-0.3.0.dist-info/METADATA,sha256=3YrkG4IV-36OekD-DEjPlxAbVOXvuHzjesCarDgJ2eQ,1382
8
+ zombie_squirrel-0.3.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
9
+ zombie_squirrel-0.3.0.dist-info/top_level.txt,sha256=FmM0coe4AangURZLjM4JwwRv2B8H6oINYCoZLKLDCKA,16
10
+ zombie_squirrel-0.3.0.dist-info/RECORD,,
@@ -1,10 +0,0 @@
1
- zombie_squirrel/__init__.py,sha256=i7J51pQP-ID_TrBZtNQmxjgzlqJ8WGQIObfHAzospx8,134
2
- zombie_squirrel/acorns.py,sha256=MZaScwDpnuuGnrx8a1vRmfv5-fr6h4Idw1_rQ2FWdB0,2132
3
- zombie_squirrel/squirrels.py,sha256=3ybJQpuNsoM8gBkHSWOKBZ_zOfsnzq35TKh0Aig2voc,2662
4
- zombie_squirrel/sync.py,sha256=jslTVIend5Z-sLJuNXKkhn-nqmKK_P0FAiRuFFYRnto,168
5
- zombie_squirrel/utils.py,sha256=74DSFK1Qbp8yQeUXpnli4kqx_QcAc8v4_6FZut0xZ8g,103
6
- zombie_squirrel-0.2.3.dist-info/licenses/LICENSE,sha256=U0Y7B3gZJHXpjJVLgTQjM8e_c8w4JJpLgGhIdsoFR1Y,1092
7
- zombie_squirrel-0.2.3.dist-info/METADATA,sha256=o82tnKDGfBAcDwt3fiSpF14octxGxKym5BmluyTyTg0,1382
8
- zombie_squirrel-0.2.3.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
9
- zombie_squirrel-0.2.3.dist-info/top_level.txt,sha256=FmM0coe4AangURZLjM4JwwRv2B8H6oINYCoZLKLDCKA,16
10
- zombie_squirrel-0.2.3.dist-info/RECORD,,