zombie-squirrel 0.3.0__py3-none-any.whl → 0.4.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- zombie_squirrel/__init__.py +2 -1
- zombie_squirrel/squirrels.py +29 -6
- {zombie_squirrel-0.3.0.dist-info → zombie_squirrel-0.4.0.dist-info}/METADATA +1 -1
- zombie_squirrel-0.4.0.dist-info/RECORD +10 -0
- zombie_squirrel-0.3.0.dist-info/RECORD +0 -10
- {zombie_squirrel-0.3.0.dist-info → zombie_squirrel-0.4.0.dist-info}/WHEEL +0 -0
- {zombie_squirrel-0.3.0.dist-info → zombie_squirrel-0.4.0.dist-info}/licenses/LICENSE +0 -0
- {zombie_squirrel-0.3.0.dist-info → zombie_squirrel-0.4.0.dist-info}/top_level.txt +0 -0
zombie_squirrel/__init__.py
CHANGED
zombie_squirrel/squirrels.py
CHANGED
|
@@ -91,7 +91,7 @@ def unique_subject_ids(force_update: bool = False) -> list[str]:
|
|
|
91
91
|
def asset_basics(force_update: bool = False) -> pd.DataFrame:
|
|
92
92
|
"""Basic asset metadata.
|
|
93
93
|
|
|
94
|
-
_id,
|
|
94
|
+
_id, _last_modified,
|
|
95
95
|
modalities, project names, data_level, subject_id, acquisition_start and _end
|
|
96
96
|
"""
|
|
97
97
|
df = rds_get_handle_empty(ACORN, NAMES["basics"])
|
|
@@ -107,6 +107,9 @@ def asset_basics(force_update: bool = False) -> pd.DataFrame:
|
|
|
107
107
|
|
|
108
108
|
if df.empty or force_update:
|
|
109
109
|
logging.info("Updating cache for asset basics")
|
|
110
|
+
df = pd.DataFrame(columns=["_id", "_last_modified", "modalities", "project_name",
|
|
111
|
+
"data_level", "subject_id",
|
|
112
|
+
"acquisition_start_time", "acquisition_end_time"])
|
|
110
113
|
client = MetadataDbClient(
|
|
111
114
|
host=API_GATEWAY_HOST,
|
|
112
115
|
version="v2",
|
|
@@ -115,29 +118,49 @@ def asset_basics(force_update: bool = False) -> pd.DataFrame:
|
|
|
115
118
|
# as large as DocDB. We'll also try to limit ourselves to only updating fields
|
|
116
119
|
# that are necessary
|
|
117
120
|
record_ids = client.retrieve_docdb_records(
|
|
118
|
-
filter_query={}, projection={"_id": 1, "
|
|
121
|
+
filter_query={}, projection={"_id": 1, "_last_modified": 1}, limit=0,
|
|
119
122
|
)
|
|
120
123
|
keep_ids = []
|
|
121
|
-
# Drop all _ids where
|
|
124
|
+
# Drop all _ids where _last_modified matches cache
|
|
122
125
|
for record in record_ids:
|
|
123
126
|
cached_row = df[df["_id"] == record["_id"]]
|
|
124
|
-
if cached_row.empty or cached_row["
|
|
127
|
+
if cached_row.empty or cached_row["_last_modified"].values[0] != record["_last_modified"]:
|
|
125
128
|
keep_ids.append(record["_id"])
|
|
126
129
|
|
|
127
130
|
# Now batch by 100 IDs at a time to avoid overloading server, and fetch all the fields
|
|
128
131
|
BATCH_SIZE = 100
|
|
129
132
|
asset_records = []
|
|
130
133
|
for i in range(0, len(keep_ids), BATCH_SIZE):
|
|
134
|
+
logging.info(f"Fetching asset basics batch {i // BATCH_SIZE + 1}...")
|
|
131
135
|
batch_ids = keep_ids[i:i + BATCH_SIZE]
|
|
132
136
|
batch_records = client.retrieve_docdb_records(
|
|
133
137
|
filter_query={"_id": {"$in": batch_ids}},
|
|
134
|
-
projection={field: 1 for field in FIELDS + ["_id", "
|
|
138
|
+
projection={field: 1 for field in FIELDS + ["_id", "_last_modified"]},
|
|
135
139
|
limit=0,
|
|
136
140
|
)
|
|
137
141
|
asset_records.extend(batch_records)
|
|
142
|
+
|
|
143
|
+
# Unwrap nested fields
|
|
144
|
+
records = []
|
|
145
|
+
for record in asset_records:
|
|
146
|
+
|
|
147
|
+
modalities = record.get("data_description", {}).get("modalities", [])
|
|
148
|
+
modality_abbreviations = [modality["abbreviation"] for modality in modalities if "abbreviation" in modality]
|
|
149
|
+
modality_abbreviations_str = ", ".join(modality_abbreviations)
|
|
150
|
+
flat_record = {
|
|
151
|
+
"_id": record["_id"],
|
|
152
|
+
"_last_modified": record.get("_last_modified", None),
|
|
153
|
+
"modalities": modality_abbreviations_str,
|
|
154
|
+
"project_name": record.get("data_description", {}).get("project_name", None),
|
|
155
|
+
"data_level": record.get("data_description", {}).get("data_level", None),
|
|
156
|
+
"subject_id": record.get("subject", {}).get("subject_id", None),
|
|
157
|
+
"acquisition_start_time": record.get("acquisition", {}).get("acquisition_start_time", None),
|
|
158
|
+
"acquisition_end_time": record.get("acquisition", {}).get("acquisition_end_time", None),
|
|
159
|
+
}
|
|
160
|
+
records.append(flat_record)
|
|
138
161
|
|
|
139
162
|
# Combine new records with the old df and store in cache
|
|
140
|
-
new_df = pd.DataFrame(
|
|
163
|
+
new_df = pd.DataFrame(records)
|
|
141
164
|
df = pd.concat([df[df["_id"].isin(keep_ids) == False], new_df], ignore_index=True)
|
|
142
165
|
|
|
143
166
|
ACORN.hide(NAMES["basics"], df)
|
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
zombie_squirrel/__init__.py,sha256=8er6wgFVb0XMkMDsmLRvR_YeO1E_sL3KaOJN9VXXwOw,152
|
|
2
|
+
zombie_squirrel/acorns.py,sha256=1mCnWCDFRnbHLddCCgiUG3RumuKUjMKVbyTVoYI0FB8,2188
|
|
3
|
+
zombie_squirrel/squirrels.py,sha256=Ln8tsa51rK6d2rpOIktSAeHYX3sYMXr3o4njZzAujAo,6340
|
|
4
|
+
zombie_squirrel/sync.py,sha256=jslTVIend5Z-sLJuNXKkhn-nqmKK_P0FAiRuFFYRnto,168
|
|
5
|
+
zombie_squirrel/utils.py,sha256=74DSFK1Qbp8yQeUXpnli4kqx_QcAc8v4_6FZut0xZ8g,103
|
|
6
|
+
zombie_squirrel-0.4.0.dist-info/licenses/LICENSE,sha256=U0Y7B3gZJHXpjJVLgTQjM8e_c8w4JJpLgGhIdsoFR1Y,1092
|
|
7
|
+
zombie_squirrel-0.4.0.dist-info/METADATA,sha256=0Rv7O3SRGDe06_F4-Kefj9JxC2xMQG1m1l3BYrZyfUE,1382
|
|
8
|
+
zombie_squirrel-0.4.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
9
|
+
zombie_squirrel-0.4.0.dist-info/top_level.txt,sha256=FmM0coe4AangURZLjM4JwwRv2B8H6oINYCoZLKLDCKA,16
|
|
10
|
+
zombie_squirrel-0.4.0.dist-info/RECORD,,
|
|
@@ -1,10 +0,0 @@
|
|
|
1
|
-
zombie_squirrel/__init__.py,sha256=SU-iCf1qQAQUKc6VnAdEUcGBlcUwuDZ3CKpJXGSrzb0,134
|
|
2
|
-
zombie_squirrel/acorns.py,sha256=1mCnWCDFRnbHLddCCgiUG3RumuKUjMKVbyTVoYI0FB8,2188
|
|
3
|
-
zombie_squirrel/squirrels.py,sha256=BkS4l9cUB7ZXQV3ySa0HCgSLei_Kc47X28BJY69Bz54,4915
|
|
4
|
-
zombie_squirrel/sync.py,sha256=jslTVIend5Z-sLJuNXKkhn-nqmKK_P0FAiRuFFYRnto,168
|
|
5
|
-
zombie_squirrel/utils.py,sha256=74DSFK1Qbp8yQeUXpnli4kqx_QcAc8v4_6FZut0xZ8g,103
|
|
6
|
-
zombie_squirrel-0.3.0.dist-info/licenses/LICENSE,sha256=U0Y7B3gZJHXpjJVLgTQjM8e_c8w4JJpLgGhIdsoFR1Y,1092
|
|
7
|
-
zombie_squirrel-0.3.0.dist-info/METADATA,sha256=3YrkG4IV-36OekD-DEjPlxAbVOXvuHzjesCarDgJ2eQ,1382
|
|
8
|
-
zombie_squirrel-0.3.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
9
|
-
zombie_squirrel-0.3.0.dist-info/top_level.txt,sha256=FmM0coe4AangURZLjM4JwwRv2B8H6oINYCoZLKLDCKA,16
|
|
10
|
-
zombie_squirrel-0.3.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|