zombie-squirrel 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,7 +1,8 @@
1
1
  """Init package"""
2
- __version__ = "0.3.0"
2
+ __version__ = "0.4.0"
3
3
 
4
4
  from zombie_squirrel.squirrels import (
5
5
  unique_project_names,
6
6
  unique_subject_ids,
7
+ asset_basics,
7
8
  )
@@ -91,7 +91,7 @@ def unique_subject_ids(force_update: bool = False) -> list[str]:
91
91
  def asset_basics(force_update: bool = False) -> pd.DataFrame:
92
92
  """Basic asset metadata.
93
93
 
94
- _id, last_modified,
94
+ _id, _last_modified,
95
95
  modalities, project names, data_level, subject_id, acquisition_start and _end
96
96
  """
97
97
  df = rds_get_handle_empty(ACORN, NAMES["basics"])
@@ -107,6 +107,9 @@ def asset_basics(force_update: bool = False) -> pd.DataFrame:
107
107
 
108
108
  if df.empty or force_update:
109
109
  logging.info("Updating cache for asset basics")
110
+ df = pd.DataFrame(columns=["_id", "_last_modified", "modalities", "project_name",
111
+ "data_level", "subject_id",
112
+ "acquisition_start_time", "acquisition_end_time"])
110
113
  client = MetadataDbClient(
111
114
  host=API_GATEWAY_HOST,
112
115
  version="v2",
@@ -115,29 +118,49 @@ def asset_basics(force_update: bool = False) -> pd.DataFrame:
115
118
  # as large as DocDB. We'll also try to limit ourselves to only updating fields
116
119
  # that are necessary
117
120
  record_ids = client.retrieve_docdb_records(
118
- filter_query={}, projection={"_id": 1, "last_modified": 1}, limit=0,
121
+ filter_query={}, projection={"_id": 1, "_last_modified": 1}, limit=0,
119
122
  )
120
123
  keep_ids = []
121
- # Drop all _ids where last_modified matches cache
124
+ # Drop all _ids where _last_modified matches cache
122
125
  for record in record_ids:
123
126
  cached_row = df[df["_id"] == record["_id"]]
124
- if cached_row.empty or cached_row["last_modified"].values[0] != record["last_modified"]:
127
+ if cached_row.empty or cached_row["_last_modified"].values[0] != record["_last_modified"]:
125
128
  keep_ids.append(record["_id"])
126
129
 
127
130
  # Now batch by 100 IDs at a time to avoid overloading server, and fetch all the fields
128
131
  BATCH_SIZE = 100
129
132
  asset_records = []
130
133
  for i in range(0, len(keep_ids), BATCH_SIZE):
134
+ logging.info(f"Fetching asset basics batch {i // BATCH_SIZE + 1}...")
131
135
  batch_ids = keep_ids[i:i + BATCH_SIZE]
132
136
  batch_records = client.retrieve_docdb_records(
133
137
  filter_query={"_id": {"$in": batch_ids}},
134
- projection={field: 1 for field in FIELDS + ["_id", "last_modified"]},
138
+ projection={field: 1 for field in FIELDS + ["_id", "_last_modified"]},
135
139
  limit=0,
136
140
  )
137
141
  asset_records.extend(batch_records)
142
+
143
+ # Unwrap nested fields
144
+ records = []
145
+ for record in asset_records:
146
+
147
+ modalities = record.get("data_description", {}).get("modalities", [])
148
+ modality_abbreviations = [modality["abbreviation"] for modality in modalities if "abbreviation" in modality]
149
+ modality_abbreviations_str = ", ".join(modality_abbreviations)
150
+ flat_record = {
151
+ "_id": record["_id"],
152
+ "_last_modified": record.get("_last_modified", None),
153
+ "modalities": modality_abbreviations_str,
154
+ "project_name": record.get("data_description", {}).get("project_name", None),
155
+ "data_level": record.get("data_description", {}).get("data_level", None),
156
+ "subject_id": record.get("subject", {}).get("subject_id", None),
157
+ "acquisition_start_time": record.get("acquisition", {}).get("acquisition_start_time", None),
158
+ "acquisition_end_time": record.get("acquisition", {}).get("acquisition_end_time", None),
159
+ }
160
+ records.append(flat_record)
138
161
 
139
162
  # Combine new records with the old df and store in cache
140
- new_df = pd.DataFrame(asset_records)
163
+ new_df = pd.DataFrame(records)
141
164
  df = pd.concat([df[df["_id"].isin(keep_ids) == False], new_df], ignore_index=True)
142
165
 
143
166
  ACORN.hide(NAMES["basics"], df)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: zombie-squirrel
3
- Version: 0.3.0
3
+ Version: 0.4.0
4
4
  Summary: Generated from aind-library-template
5
5
  Author: Allen Institute for Neural Dynamics
6
6
  License: MIT
@@ -0,0 +1,10 @@
1
+ zombie_squirrel/__init__.py,sha256=8er6wgFVb0XMkMDsmLRvR_YeO1E_sL3KaOJN9VXXwOw,152
2
+ zombie_squirrel/acorns.py,sha256=1mCnWCDFRnbHLddCCgiUG3RumuKUjMKVbyTVoYI0FB8,2188
3
+ zombie_squirrel/squirrels.py,sha256=Ln8tsa51rK6d2rpOIktSAeHYX3sYMXr3o4njZzAujAo,6340
4
+ zombie_squirrel/sync.py,sha256=jslTVIend5Z-sLJuNXKkhn-nqmKK_P0FAiRuFFYRnto,168
5
+ zombie_squirrel/utils.py,sha256=74DSFK1Qbp8yQeUXpnli4kqx_QcAc8v4_6FZut0xZ8g,103
6
+ zombie_squirrel-0.4.0.dist-info/licenses/LICENSE,sha256=U0Y7B3gZJHXpjJVLgTQjM8e_c8w4JJpLgGhIdsoFR1Y,1092
7
+ zombie_squirrel-0.4.0.dist-info/METADATA,sha256=0Rv7O3SRGDe06_F4-Kefj9JxC2xMQG1m1l3BYrZyfUE,1382
8
+ zombie_squirrel-0.4.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
9
+ zombie_squirrel-0.4.0.dist-info/top_level.txt,sha256=FmM0coe4AangURZLjM4JwwRv2B8H6oINYCoZLKLDCKA,16
10
+ zombie_squirrel-0.4.0.dist-info/RECORD,,
@@ -1,10 +0,0 @@
1
- zombie_squirrel/__init__.py,sha256=SU-iCf1qQAQUKc6VnAdEUcGBlcUwuDZ3CKpJXGSrzb0,134
2
- zombie_squirrel/acorns.py,sha256=1mCnWCDFRnbHLddCCgiUG3RumuKUjMKVbyTVoYI0FB8,2188
3
- zombie_squirrel/squirrels.py,sha256=BkS4l9cUB7ZXQV3ySa0HCgSLei_Kc47X28BJY69Bz54,4915
4
- zombie_squirrel/sync.py,sha256=jslTVIend5Z-sLJuNXKkhn-nqmKK_P0FAiRuFFYRnto,168
5
- zombie_squirrel/utils.py,sha256=74DSFK1Qbp8yQeUXpnli4kqx_QcAc8v4_6FZut0xZ8g,103
6
- zombie_squirrel-0.3.0.dist-info/licenses/LICENSE,sha256=U0Y7B3gZJHXpjJVLgTQjM8e_c8w4JJpLgGhIdsoFR1Y,1092
7
- zombie_squirrel-0.3.0.dist-info/METADATA,sha256=3YrkG4IV-36OekD-DEjPlxAbVOXvuHzjesCarDgJ2eQ,1382
8
- zombie_squirrel-0.3.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
9
- zombie_squirrel-0.3.0.dist-info/top_level.txt,sha256=FmM0coe4AangURZLjM4JwwRv2B8H6oINYCoZLKLDCKA,16
10
- zombie_squirrel-0.3.0.dist-info/RECORD,,