swcgeom 0.14.0__py3-none-any.whl → 0.16.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of swcgeom might be problematic. Click here for more details.
- swcgeom/_version.py +2 -2
- swcgeom/analysis/lmeasure.py +821 -0
- swcgeom/analysis/sholl.py +31 -2
- swcgeom/core/__init__.py +4 -0
- swcgeom/core/branch.py +9 -4
- swcgeom/core/branch_tree.py +2 -3
- swcgeom/core/{segment.py → compartment.py} +14 -9
- swcgeom/core/node.py +0 -8
- swcgeom/core/path.py +21 -6
- swcgeom/core/population.py +42 -3
- swcgeom/core/swc_utils/assembler.py +20 -138
- swcgeom/core/swc_utils/base.py +12 -5
- swcgeom/core/swc_utils/checker.py +12 -2
- swcgeom/core/swc_utils/subtree.py +2 -2
- swcgeom/core/tree.py +53 -49
- swcgeom/core/tree_utils.py +27 -5
- swcgeom/core/tree_utils_impl.py +22 -6
- swcgeom/images/augmentation.py +6 -1
- swcgeom/images/contrast.py +107 -0
- swcgeom/images/folder.py +111 -29
- swcgeom/images/io.py +79 -40
- swcgeom/transforms/__init__.py +2 -0
- swcgeom/transforms/base.py +41 -21
- swcgeom/transforms/branch.py +5 -5
- swcgeom/transforms/geometry.py +42 -18
- swcgeom/transforms/image_preprocess.py +100 -0
- swcgeom/transforms/image_stack.py +46 -28
- swcgeom/transforms/images.py +76 -6
- swcgeom/transforms/mst.py +10 -18
- swcgeom/transforms/neurolucida_asc.py +495 -0
- swcgeom/transforms/population.py +2 -2
- swcgeom/transforms/tree.py +12 -14
- swcgeom/transforms/tree_assembler.py +85 -19
- swcgeom/utils/__init__.py +1 -0
- swcgeom/utils/neuromorpho.py +425 -300
- swcgeom/utils/numpy_helper.py +14 -4
- swcgeom/utils/plotter_2d.py +130 -0
- swcgeom/utils/renderer.py +28 -139
- swcgeom/utils/sdf.py +5 -1
- {swcgeom-0.14.0.dist-info → swcgeom-0.16.0.dist-info}/METADATA +3 -3
- swcgeom-0.16.0.dist-info/RECORD +67 -0
- {swcgeom-0.14.0.dist-info → swcgeom-0.16.0.dist-info}/WHEEL +1 -1
- swcgeom-0.14.0.dist-info/RECORD +0 -62
- {swcgeom-0.14.0.dist-info → swcgeom-0.16.0.dist-info}/LICENSE +0 -0
- {swcgeom-0.14.0.dist-info → swcgeom-0.16.0.dist-info}/top_level.txt +0 -0
swcgeom/utils/neuromorpho.py
CHANGED
|
@@ -81,7 +81,9 @@ import logging
|
|
|
81
81
|
import math
|
|
82
82
|
import os
|
|
83
83
|
import urllib.parse
|
|
84
|
-
from typing import Any, Callable, Dict, Iterable, List, Optional
|
|
84
|
+
from typing import Any, Callable, Dict, Iterable, List, Literal, Optional, Tuple
|
|
85
|
+
|
|
86
|
+
from tqdm import tqdm
|
|
85
87
|
|
|
86
88
|
from swcgeom.utils import FileReader
|
|
87
89
|
|
|
@@ -89,24 +91,39 @@ __all__ = [
|
|
|
89
91
|
"neuromorpho_is_valid",
|
|
90
92
|
"neuromorpho_convert_lmdb_to_swc",
|
|
91
93
|
"download_neuromorpho",
|
|
94
|
+
"NeuroMorpho",
|
|
92
95
|
]
|
|
93
96
|
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
97
|
+
|
|
98
|
+
URL_BASE = "https://neuromorpho.org"
|
|
99
|
+
URL_METADATA = "api/neuron"
|
|
100
|
+
URL_MORPHO_CNG = "dableFiles/$ARCHIVE/CNG%20version/$NEURON.CNG.swc"
|
|
101
|
+
URL_MORPHO_SOURCE = "dableFiles/$ARCHIVE/Source-Version/$NEURON.$EXT"
|
|
102
|
+
URL_LOG_CNG = "dableFiles/$ARCHIVE/Remaining%20issues/$NEURON.CNG.swc.std"
|
|
103
|
+
URL_LOG_SOURCE = "dableFiles/$ARCHIVE/Standardization%20log/$NEURON.std"
|
|
104
|
+
API_PAGE_SIZE_MAX = 500
|
|
99
105
|
|
|
100
106
|
KB = 1024
|
|
101
107
|
MB = 1024 * KB
|
|
102
108
|
GB = 1024 * MB
|
|
103
109
|
|
|
104
110
|
# Test version: 8.5.25 (2023-08-01)
|
|
105
|
-
# About 1.1 GB and 18 GB
|
|
106
111
|
# No ETAs for future version
|
|
112
|
+
# Size of metadata about 0.5 GB
|
|
113
|
+
# Size of morpho_cng about 18 GB
|
|
114
|
+
# Not sure about the size of others
|
|
107
115
|
SIZE_METADATA = 2 * GB
|
|
108
116
|
SIZE_DATA = 20 * GB
|
|
109
117
|
|
|
118
|
+
RESOURCES = Literal["morpho_cng", "morpho_source", "log_cng", "log_source"]
|
|
119
|
+
DOWNLOAD_CONFIGS: Dict[RESOURCES, Tuple[str, int]] = {
|
|
120
|
+
# name/path: (url, size)
|
|
121
|
+
"morpho_cng": (URL_MORPHO_CNG, 20 * GB),
|
|
122
|
+
"morpho_source": (URL_LOG_CNG, 512 * GB),
|
|
123
|
+
"log_cng": (URL_LOG_CNG, 512 * GB),
|
|
124
|
+
"log_source": (URL_LOG_SOURCE, 512 * GB),
|
|
125
|
+
}
|
|
126
|
+
|
|
110
127
|
# fmt:off
|
|
111
128
|
# Test version: 8.5.25 (2023-08-01)
|
|
112
129
|
# No ETAs for future version
|
|
@@ -132,316 +149,416 @@ def neuromorpho_is_valid(metadata: Dict[str, Any]) -> bool:
|
|
|
132
149
|
return metadata["neuron_id"] not in invalid_ids
|
|
133
150
|
|
|
134
151
|
|
|
135
|
-
# pylint: disable-next=too-many-locals
|
|
136
152
|
def neuromorpho_convert_lmdb_to_swc(
|
|
137
|
-
root: str,
|
|
138
|
-
dest: Optional[str] = None,
|
|
139
|
-
*,
|
|
140
|
-
group_by: Optional[str | Callable[[Dict[str, Any]], str | None]] = None,
|
|
141
|
-
where: Optional[Callable[[Dict[str, Any]], bool]] = None,
|
|
142
|
-
encoding: str | None = "utf-8",
|
|
143
|
-
verbose: bool = False,
|
|
153
|
+
root: str, dest: Optional[str] = None, *, verbose: bool = False, **kwargs
|
|
144
154
|
) -> None:
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
else os.path.join(dest, f"{kk}.swc")
|
|
155
|
+
nmo = NeuroMorpho(root, verbose=verbose)
|
|
156
|
+
nmo.convert_lmdb_to_swc(dest, **kwargs)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def download_neuromorpho(path: str, *, verbose: bool = False, **kwargs) -> None:
|
|
160
|
+
nmo = NeuroMorpho(path, verbose=verbose)
|
|
161
|
+
nmo.download(**kwargs)
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
class NeuroMorpho:
|
|
165
|
+
def __init__(
|
|
166
|
+
self, root: str, *, url_base: str = URL_BASE, verbose: bool = False
|
|
167
|
+
) -> None:
|
|
168
|
+
"""
|
|
169
|
+
Parameters
|
|
170
|
+
----------
|
|
171
|
+
root : str
|
|
172
|
+
verbose : bool, default False
|
|
173
|
+
Show verbose log.
|
|
174
|
+
"""
|
|
175
|
+
|
|
176
|
+
super().__init__()
|
|
177
|
+
self.root = root
|
|
178
|
+
self.url_base = url_base
|
|
179
|
+
self.verbose = verbose
|
|
180
|
+
|
|
181
|
+
def download(
|
|
182
|
+
self,
|
|
183
|
+
*,
|
|
184
|
+
retry: int = 3,
|
|
185
|
+
metadata: bool = True,
|
|
186
|
+
resources: Iterable[RESOURCES] = ["morpho_cng"],
|
|
187
|
+
**kwargs,
|
|
188
|
+
) -> None:
|
|
189
|
+
"""Download data from neuromorpho.org."""
|
|
190
|
+
|
|
191
|
+
# metadata
|
|
192
|
+
path_m = os.path.join(self.root, "metadata")
|
|
193
|
+
if metadata:
|
|
194
|
+
err_pages = None
|
|
195
|
+
for i in range(retry + 1):
|
|
196
|
+
if err_pages is not None and len(err_pages) == 0:
|
|
197
|
+
break
|
|
198
|
+
|
|
199
|
+
self._info("download metadata")
|
|
200
|
+
if i != 0:
|
|
201
|
+
self._info("retry %d: %s", i, json.dumps(err_pages))
|
|
202
|
+
|
|
203
|
+
err_pages = self._download_metadata(path_m, pages=err_pages, **kwargs)
|
|
204
|
+
|
|
205
|
+
self._info("download metadata done")
|
|
206
|
+
if err_pages is not None and len(err_pages) != 0:
|
|
207
|
+
self._warning("fails to download metadata: %s", json.dumps(err_pages))
|
|
208
|
+
else:
|
|
209
|
+
self._info("skip download metadata")
|
|
210
|
+
|
|
211
|
+
# file
|
|
212
|
+
def dumps(keys: List[bytes]) -> str:
|
|
213
|
+
return json.dumps([i.decode("utf-8") for i in keys])
|
|
214
|
+
|
|
215
|
+
for name in resources:
|
|
216
|
+
url, map_size = DOWNLOAD_CONFIGS[name]
|
|
217
|
+
path = os.path.join(self.root, name)
|
|
218
|
+
|
|
219
|
+
err_keys = None
|
|
220
|
+
for i in range(retry + 1):
|
|
221
|
+
if err_keys is not None and len(err_keys) == 0:
|
|
222
|
+
break
|
|
223
|
+
|
|
224
|
+
self._info("download %s", name)
|
|
225
|
+
if err_keys is not None:
|
|
226
|
+
self._info("retry %d: %s", i, dumps(err_keys))
|
|
227
|
+
|
|
228
|
+
err_keys = self._download_files(
|
|
229
|
+
url, path, path_m, map_size=map_size, **kwargs
|
|
221
230
|
)
|
|
222
231
|
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
232
|
+
self._info("download %s done", name)
|
|
233
|
+
if err_keys is not None and len(err_keys) != 0:
|
|
234
|
+
self._warning("fails to download %s: %s", name, dumps(err_keys))
|
|
235
|
+
|
|
236
|
+
# pylint: disable-next=too-many-locals
|
|
237
|
+
def convert_lmdb_to_swc(
|
|
238
|
+
self,
|
|
239
|
+
dest: Optional[str] = None,
|
|
240
|
+
*,
|
|
241
|
+
group_by: Optional[str | Callable[[Dict[str, Any]], str | None]] = None,
|
|
242
|
+
where: Optional[Callable[[Dict[str, Any]], bool]] = None,
|
|
243
|
+
encoding: str | None = "utf-8",
|
|
244
|
+
) -> None:
|
|
245
|
+
r"""Convert lmdb format to SWCs.
|
|
246
|
+
|
|
247
|
+
Parameters
|
|
248
|
+
----------
|
|
249
|
+
path : str
|
|
250
|
+
dest : str, optional
|
|
251
|
+
If None, use `path/swc`.
|
|
252
|
+
group_by : str | (metadata: Dict[str, Any]) -> str | None, optional
|
|
253
|
+
Group neurons by metadata. If a None is returned then no
|
|
254
|
+
grouping. If a string is entered, use it as a metadata
|
|
255
|
+
attribute name for grouping, e.g.: `archive`, `species`.
|
|
256
|
+
where : (metadata: Dict[str, Any]) -> bool, optional
|
|
257
|
+
Filter neurons by metadata.
|
|
258
|
+
encoding : str | None, default to `utf-8`
|
|
259
|
+
Change swc encoding, part of the original data is not utf-8
|
|
260
|
+
encoded. If is None, keep the original encoding format.
|
|
261
|
+
verbose : bool, default False
|
|
262
|
+
Print verbose info.
|
|
263
|
+
|
|
264
|
+
Notes
|
|
265
|
+
-----
|
|
266
|
+
We are asserting the following folder.
|
|
267
|
+
|
|
268
|
+
```text
|
|
269
|
+
|- root
|
|
270
|
+
| |- metadata # input
|
|
271
|
+
| |- morpho_cng # input
|
|
272
|
+
| |- swc # output
|
|
273
|
+
| | |- groups # output of groups if grouped
|
|
274
|
+
```
|
|
275
|
+
|
|
276
|
+
See Also
|
|
277
|
+
--------
|
|
278
|
+
neuromorpho_is_valid :
|
|
279
|
+
Recommended filter function, try `where=neuromorpho_is_valid`
|
|
280
|
+
"""
|
|
281
|
+
|
|
282
|
+
import lmdb
|
|
283
|
+
|
|
284
|
+
env_m = lmdb.Environment(os.path.join(self.root, "metadata"), readonly=True)
|
|
285
|
+
with env_m.begin() as tx_m:
|
|
286
|
+
where = where or (lambda _: True)
|
|
287
|
+
if isinstance(group_by, str):
|
|
288
|
+
key = group_by
|
|
289
|
+
group_by = lambda v: v[
|
|
290
|
+
key
|
|
291
|
+
] # pylint: disable=unnecessary-lambda-assignment
|
|
292
|
+
elif group_by is None:
|
|
293
|
+
group_by = (
|
|
294
|
+
lambda _: None
|
|
295
|
+
) # pylint: disable=unnecessary-lambda-assignment
|
|
296
|
+
items = []
|
|
297
|
+
for k, v in tx_m.cursor():
|
|
298
|
+
metadata = json.loads(v)
|
|
299
|
+
if where(metadata):
|
|
300
|
+
items.append((k, group_by(metadata)))
|
|
301
|
+
|
|
302
|
+
env_m.close()
|
|
303
|
+
|
|
304
|
+
dest = dest or os.path.join(self.root, "swc")
|
|
305
|
+
os.makedirs(dest, exist_ok=True)
|
|
306
|
+
for grp in set(grp for _, grp in items if grp is not None):
|
|
307
|
+
os.makedirs(os.path.join(dest, grp), exist_ok=True)
|
|
308
|
+
|
|
309
|
+
env_c = lmdb.Environment(os.path.join(self.root, "morpho_cng"), readonly=True)
|
|
310
|
+
with env_c.begin() as tx_c:
|
|
311
|
+
for k, grp in tqdm(items) if self.verbose else items:
|
|
312
|
+
kk = k.decode("utf-8")
|
|
313
|
+
try:
|
|
314
|
+
bs = tx_c.get(k)
|
|
315
|
+
if bs is None:
|
|
316
|
+
self._warning("morpho_cng of '%s' not exists", kk)
|
|
317
|
+
continue
|
|
318
|
+
|
|
319
|
+
fs = (
|
|
320
|
+
os.path.join(dest, grp, f"{kk}.swc")
|
|
321
|
+
if grp is not None
|
|
322
|
+
else os.path.join(dest, f"{kk}.swc")
|
|
323
|
+
)
|
|
324
|
+
|
|
325
|
+
if encoding is None:
|
|
326
|
+
with open(fs, "wb") as f:
|
|
327
|
+
f.write(bs) # type: ignore
|
|
328
|
+
else:
|
|
329
|
+
bs = io.BytesIO(bs) # type: ignore
|
|
330
|
+
with (
|
|
331
|
+
open(fs, "w", encoding=encoding) as fw,
|
|
332
|
+
FileReader(bs, encoding="detect") as fr,
|
|
333
|
+
):
|
|
334
|
+
fw.writelines(fr.readlines())
|
|
335
|
+
except (IOError, lmdb.Error) as e:
|
|
336
|
+
self._warning("fails to convert of %s, err: %s", kk, e)
|
|
337
|
+
|
|
338
|
+
env_c.close()
|
|
339
|
+
|
|
340
|
+
# Downloader
|
|
341
|
+
|
|
342
|
+
def _download_metadata(
|
|
343
|
+
self,
|
|
344
|
+
path: str,
|
|
345
|
+
*,
|
|
346
|
+
pages: Optional[Iterable[int]] = None,
|
|
347
|
+
page_size: int = API_PAGE_SIZE_MAX,
|
|
348
|
+
**kwargs,
|
|
349
|
+
) -> List[int]:
|
|
350
|
+
r"""Download all neuron metadata.
|
|
351
|
+
|
|
352
|
+
Parameters
|
|
353
|
+
----------
|
|
354
|
+
path : str
|
|
355
|
+
Path to save data.
|
|
356
|
+
pages : list of int, optional
|
|
357
|
+
If is None, download all pages.
|
|
358
|
+
verbose : bool, default False
|
|
359
|
+
Show verbose log.
|
|
360
|
+
**kwargs :
|
|
361
|
+
Forwarding to `get`.
|
|
362
|
+
|
|
363
|
+
Returns
|
|
364
|
+
-------
|
|
365
|
+
err_pages : list of int
|
|
366
|
+
Failed pages.
|
|
367
|
+
"""
|
|
368
|
+
|
|
369
|
+
# TODO: how to cache between versions?
|
|
370
|
+
import lmdb
|
|
371
|
+
|
|
372
|
+
env = lmdb.Environment(path, map_size=SIZE_METADATA)
|
|
373
|
+
if pages is None:
|
|
374
|
+
res = self._get_metadata(page=0, page_size=1, **kwargs)
|
|
375
|
+
total = res["page"]["totalElements"]
|
|
376
|
+
pages = range(math.ceil(total / page_size))
|
|
377
|
+
|
|
378
|
+
err_pages = []
|
|
379
|
+
for page in tqdm(pages) if self.verbose else pages:
|
|
380
|
+
try:
|
|
381
|
+
res = self._get_metadata(page, page_size=page_size, **kwargs)
|
|
382
|
+
with env.begin(write=True) as tx:
|
|
383
|
+
for neuron in res["_embedded"]["neuronResources"]:
|
|
384
|
+
k = str(neuron["neuron_id"]).encode("utf-8")
|
|
385
|
+
v = json.dumps(neuron).encode("utf-8")
|
|
386
|
+
tx.put(key=k, value=v)
|
|
387
|
+
except IOError as e:
|
|
388
|
+
err_pages.append(page)
|
|
389
|
+
self._warning("fails to get metadata of page %s, err: %s", page, e)
|
|
390
|
+
|
|
391
|
+
env.close()
|
|
392
|
+
return err_pages
|
|
393
|
+
|
|
394
|
+
# pylint: disable-next=too-many-locals
|
|
395
|
+
def _download_files(
|
|
396
|
+
self,
|
|
397
|
+
url: str,
|
|
398
|
+
path: str,
|
|
399
|
+
path_metadata: str,
|
|
400
|
+
*,
|
|
401
|
+
keys: Optional[Iterable[bytes]] = None,
|
|
402
|
+
override: bool = False,
|
|
403
|
+
map_size: int = 512 * GB,
|
|
404
|
+
**kwargs,
|
|
405
|
+
) -> List[bytes]:
|
|
406
|
+
"""Download files.
|
|
407
|
+
|
|
408
|
+
Parameters
|
|
409
|
+
----------
|
|
410
|
+
url : str
|
|
411
|
+
path : str
|
|
412
|
+
Path to save data.
|
|
413
|
+
path_metadata : str
|
|
414
|
+
Path to lmdb of metadata.
|
|
415
|
+
keys : list of bytes, optional
|
|
416
|
+
If exist, ignore `override` option. If None, download all key.
|
|
417
|
+
override : bool, default False
|
|
418
|
+
Override even exists.
|
|
419
|
+
map_size : int, default 512GB
|
|
420
|
+
**kwargs :
|
|
421
|
+
Forwarding to `get`.
|
|
422
|
+
|
|
423
|
+
Returns
|
|
424
|
+
-------
|
|
425
|
+
err_keys : list of str
|
|
426
|
+
Failed keys.
|
|
427
|
+
"""
|
|
428
|
+
|
|
429
|
+
import lmdb
|
|
430
|
+
|
|
431
|
+
env_m = lmdb.Environment(path_metadata, map_size=SIZE_METADATA, readonly=True)
|
|
432
|
+
env_c = lmdb.Environment(path, map_size=map_size)
|
|
433
|
+
if keys is None:
|
|
434
|
+
with env_m.begin() as tx_m:
|
|
435
|
+
if override:
|
|
436
|
+
keys = [k for k, v in tx_m.cursor()]
|
|
226
437
|
else:
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
open(fs, "w", encoding=encoding) as fw,
|
|
230
|
-
FileReader(bs, encoding="detect") as fr,
|
|
231
|
-
):
|
|
232
|
-
fw.writelines(fr.readlines())
|
|
233
|
-
except Exception as e: # pylint: disable=broad-exception-caught
|
|
234
|
-
logging.warning("fails to convert of %s, err: %s", kk, e)
|
|
235
|
-
|
|
236
|
-
env_c.close()
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
def download_neuromorpho(
|
|
240
|
-
path: str, *, retry: int = 3, verbose: bool = False, **kwargs
|
|
241
|
-
) -> None:
|
|
242
|
-
kwargs.setdefault("verbose", verbose)
|
|
243
|
-
|
|
244
|
-
path_m = os.path.join(path, "metadata")
|
|
245
|
-
path_c = os.path.join(path, "cng_version")
|
|
438
|
+
with env_c.begin() as tx:
|
|
439
|
+
keys = [k for k, v in tx_m.cursor() if tx.get(k) is None]
|
|
246
440
|
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
261
|
-
)
|
|
441
|
+
err_keys = []
|
|
442
|
+
for k in tqdm(keys) if self.verbose else keys:
|
|
443
|
+
try:
|
|
444
|
+
with env_m.begin() as tx:
|
|
445
|
+
metadata = json.loads(tx.get(k).decode("utf-8")) # type: ignore
|
|
446
|
+
|
|
447
|
+
swc = self._get_file(url, metadata, **kwargs)
|
|
448
|
+
with env_c.begin(write=True) as tx:
|
|
449
|
+
tx.put(key=k, value=swc)
|
|
450
|
+
except IOError as e:
|
|
451
|
+
err_keys.append(k)
|
|
452
|
+
self._warning(
|
|
453
|
+
"fails to get morphology file `%s`, err: %s", k.decode("utf-8"), e
|
|
454
|
+
)
|
|
262
455
|
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
456
|
+
env_m.close()
|
|
457
|
+
env_c.close()
|
|
458
|
+
return err_keys
|
|
459
|
+
|
|
460
|
+
def _get_metadata(
|
|
461
|
+
self, page: int, page_size: int = API_PAGE_SIZE_MAX, **kwargs
|
|
462
|
+
) -> Dict[str, Any]:
|
|
463
|
+
params = {
|
|
464
|
+
"page": page,
|
|
465
|
+
"size": page_size,
|
|
466
|
+
"sort": "neuron_id,neuron_id,asc",
|
|
467
|
+
}
|
|
468
|
+
query = "&".join([f"{k}={v}" for k, v in params.items()])
|
|
469
|
+
url = f"{URL_METADATA}?{query}"
|
|
470
|
+
resp = self._get(url, **kwargs)
|
|
471
|
+
return json.loads(resp)
|
|
472
|
+
|
|
473
|
+
def _get_file(self, url: str, metadata: Dict[str, Any], **kwargs) -> bytes:
|
|
474
|
+
"""Get file.
|
|
475
|
+
|
|
476
|
+
Returns
|
|
477
|
+
-------
|
|
478
|
+
bs : bytes
|
|
479
|
+
Bytes of morphology file, encoding is NOT FIXED.
|
|
480
|
+
"""
|
|
481
|
+
|
|
482
|
+
archive = urllib.parse.quote(metadata["archive"].lower())
|
|
483
|
+
neuron = urllib.parse.quote(metadata["neuron_name"])
|
|
484
|
+
ext = self._guess_ext(metadata)
|
|
485
|
+
url = (
|
|
486
|
+
url.replace("$ARCHIVE", archive)
|
|
487
|
+
.replace("$NEURON", neuron)
|
|
488
|
+
.replace("$EXT", ext)
|
|
278
489
|
)
|
|
490
|
+
return self._get(url, **kwargs)
|
|
491
|
+
|
|
492
|
+
def _get(
|
|
493
|
+
self, url: str, *, timeout: int = 2 * 60, proxy: Optional[str] = None
|
|
494
|
+
) -> bytes:
|
|
495
|
+
if not url.startswith("http://") and not url.startswith("https://"):
|
|
496
|
+
url = urllib.parse.urljoin(self.url_base, url)
|
|
497
|
+
|
|
498
|
+
proxies = None
|
|
499
|
+
if proxy is not None:
|
|
500
|
+
proxies = {"http": proxy, "https": proxy}
|
|
501
|
+
|
|
502
|
+
response = self._session().get(url, timeout=timeout, proxies=proxies)
|
|
503
|
+
response.raise_for_status()
|
|
504
|
+
return response.content
|
|
505
|
+
|
|
506
|
+
def _session(self) -> Any:
|
|
507
|
+
if hasattr(self, "session"):
|
|
508
|
+
return self.session
|
|
509
|
+
|
|
510
|
+
import requests
|
|
511
|
+
import requests.adapters
|
|
512
|
+
import urllib3
|
|
513
|
+
import urllib3.util
|
|
514
|
+
|
|
515
|
+
class CustomSSLContextHTTPAdapter(requests.adapters.HTTPAdapter):
|
|
516
|
+
def __init__(self, ssl_context=None, **kwargs):
|
|
517
|
+
self.ssl_context = ssl_context
|
|
518
|
+
super().__init__(**kwargs)
|
|
519
|
+
|
|
520
|
+
def init_poolmanager(self, connections, maxsize, block=False):
|
|
521
|
+
super().init_poolmanager(
|
|
522
|
+
connections, maxsize, block, ssl_context=self.ssl_context
|
|
523
|
+
)
|
|
279
524
|
|
|
525
|
+
def proxy_manager_for(self, proxy, **proxy_kwargs):
|
|
526
|
+
return super().proxy_manager_for(
|
|
527
|
+
proxy, **proxy_kwargs, ssl_context=self.ssl_context
|
|
528
|
+
)
|
|
280
529
|
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
)
|
|
284
|
-
"""Download all neuron metadata.
|
|
285
|
-
|
|
286
|
-
Parameters
|
|
287
|
-
----------
|
|
288
|
-
path : str
|
|
289
|
-
Path to save data.
|
|
290
|
-
pages : list of int, optional
|
|
291
|
-
If is None, download all pages.
|
|
292
|
-
verbose : bool, default False
|
|
293
|
-
Show verbose log.
|
|
294
|
-
**kwargs :
|
|
295
|
-
Forwarding to `get`.
|
|
296
|
-
|
|
297
|
-
Returns
|
|
298
|
-
-------
|
|
299
|
-
err_pages : list of int
|
|
300
|
-
Failed pages.
|
|
301
|
-
"""
|
|
302
|
-
# TODO: how to cache between versions?
|
|
303
|
-
import lmdb
|
|
304
|
-
from tqdm import tqdm
|
|
305
|
-
|
|
306
|
-
env = lmdb.Environment(path, map_size=SIZE_METADATA)
|
|
307
|
-
page_size = API_NEURON_MAX_SIZE
|
|
308
|
-
if pages is None:
|
|
309
|
-
res = get_metadata(page=0, page_size=1, **kwargs)
|
|
310
|
-
total = res["page"]["totalElements"]
|
|
311
|
-
pages = range(math.ceil(total / page_size))
|
|
312
|
-
|
|
313
|
-
err_pages = []
|
|
314
|
-
for page in tqdm(pages) if verbose else pages:
|
|
315
|
-
try:
|
|
316
|
-
res = get_metadata(page, page_size=page_size, **kwargs)
|
|
317
|
-
with env.begin(write=True) as tx:
|
|
318
|
-
for neuron in res["_embedded"]["neuronResources"]:
|
|
319
|
-
k = str(neuron["neuron_id"]).encode("utf-8")
|
|
320
|
-
v = json.dumps(neuron).encode("utf-8")
|
|
321
|
-
tx.put(key=k, value=v)
|
|
322
|
-
except Exception as e: # pylint: disable=broad-exception-caught
|
|
323
|
-
err_pages.append(page)
|
|
324
|
-
logging.warning("fails to get metadata of page %s, err: %s", page, e)
|
|
325
|
-
|
|
326
|
-
env.close()
|
|
327
|
-
return err_pages
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
# pylint: disable-next=too-many-locals
|
|
331
|
-
def download_cng_version(
|
|
332
|
-
path: str,
|
|
333
|
-
path_metadata: str,
|
|
334
|
-
*,
|
|
335
|
-
keys: Optional[Iterable[bytes]] = None,
|
|
336
|
-
override: bool = False,
|
|
337
|
-
verbose: bool = False,
|
|
338
|
-
**kwargs,
|
|
339
|
-
) -> List[bytes]:
|
|
340
|
-
"""Download GNG version swc.
|
|
341
|
-
|
|
342
|
-
Parameters
|
|
343
|
-
----------
|
|
344
|
-
path : str
|
|
345
|
-
Path to save data.
|
|
346
|
-
path_metadata : str
|
|
347
|
-
Path to lmdb of metadata.
|
|
348
|
-
keys : list of bytes, optional
|
|
349
|
-
If exist, ignore `override` option. If None, download all key.
|
|
350
|
-
override : bool, default False
|
|
351
|
-
Override even exists.
|
|
352
|
-
verbose : bool, default False
|
|
353
|
-
Show verbose log.
|
|
354
|
-
**kwargs :
|
|
355
|
-
Forwarding to `get`.
|
|
356
|
-
|
|
357
|
-
Returns
|
|
358
|
-
-------
|
|
359
|
-
err_keys : list of str
|
|
360
|
-
Failed keys.
|
|
361
|
-
"""
|
|
362
|
-
import lmdb
|
|
363
|
-
from tqdm import tqdm
|
|
364
|
-
|
|
365
|
-
env_m = lmdb.Environment(path_metadata, map_size=SIZE_METADATA, readonly=True)
|
|
366
|
-
env_c = lmdb.Environment(path, map_size=SIZE_DATA)
|
|
367
|
-
if keys is None:
|
|
368
|
-
with env_m.begin() as tx_m:
|
|
369
|
-
if override:
|
|
370
|
-
keys = [k for k, v in tx_m.cursor()]
|
|
371
|
-
else:
|
|
372
|
-
with env_c.begin() as tx:
|
|
373
|
-
keys = [k for k, v in tx_m.cursor() if tx.get(k) is None]
|
|
374
|
-
|
|
375
|
-
err_keys = []
|
|
376
|
-
for k in tqdm(keys) if verbose else keys:
|
|
377
|
-
try:
|
|
378
|
-
with env_m.begin() as tx:
|
|
379
|
-
metadata = json.loads(tx.get(k).decode("utf-8")) # type: ignore
|
|
380
|
-
|
|
381
|
-
swc = get_cng_version(metadata, **kwargs)
|
|
382
|
-
with env_c.begin(write=True) as tx:
|
|
383
|
-
tx.put(key=k, value=swc)
|
|
384
|
-
except Exception as e: # pylint: disable=broad-exception-caught
|
|
385
|
-
err_keys.append(k)
|
|
386
|
-
logging.warning(
|
|
387
|
-
"fails to get cng version of '%s', err: %s", k.decode("utf-8"), e
|
|
388
|
-
)
|
|
389
|
-
|
|
390
|
-
env_m.close()
|
|
391
|
-
env_c.close()
|
|
392
|
-
return err_keys
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
def get_metadata(
|
|
396
|
-
page, page_size: int = API_NEURON_MAX_SIZE, **kwargs
|
|
397
|
-
) -> Dict[str, Any]:
|
|
398
|
-
params = {
|
|
399
|
-
"page": page,
|
|
400
|
-
"size": page_size,
|
|
401
|
-
"sort": "neuron_id,neuron_id,asc",
|
|
402
|
-
}
|
|
403
|
-
query = "&".join([f"{k}={v}" for k, v in params.items()])
|
|
404
|
-
url = f"{URL_NEURON}?{query}"
|
|
405
|
-
|
|
406
|
-
s = get(url, **kwargs)
|
|
407
|
-
return json.loads(s)
|
|
408
|
-
|
|
530
|
+
ctx = urllib3.util.create_urllib3_context()
|
|
531
|
+
ctx.load_default_certs()
|
|
532
|
+
ctx.set_ciphers("DEFAULT@SECLEVEL=1")
|
|
409
533
|
|
|
410
|
-
|
|
411
|
-
|
|
534
|
+
session = requests.session()
|
|
535
|
+
session.adapters.pop("https://", None)
|
|
536
|
+
session.mount("https://", CustomSSLContextHTTPAdapter(ssl_context=ctx))
|
|
412
537
|
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
bs : bytes
|
|
416
|
-
SWC bytes, encoding is NOT FIXED.
|
|
417
|
-
"""
|
|
418
|
-
archive = urllib.parse.quote(metadata["archive"].lower())
|
|
419
|
-
neuron = urllib.parse.quote(metadata["neuron_name"])
|
|
420
|
-
url = URL_CNG_VERSION.replace("$ARCHIVE", archive).replace("$NEURON", neuron)
|
|
421
|
-
return get(url, **kwargs)
|
|
538
|
+
self.session = session
|
|
539
|
+
return session
|
|
422
540
|
|
|
541
|
+
# format
|
|
542
|
+
def _guess_ext(self, metadata) -> str:
|
|
543
|
+
match metadata["original_format"]:
|
|
544
|
+
case "Custom.xml":
|
|
545
|
+
return "morph.xml"
|
|
423
546
|
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
import pycurl
|
|
547
|
+
case _:
|
|
548
|
+
_, ext = os.path.splitext(metadata["original_format"])
|
|
549
|
+
return ext[1:]
|
|
428
550
|
|
|
429
|
-
|
|
430
|
-
c = pycurl.Curl()
|
|
431
|
-
c.setopt(pycurl.URL, url)
|
|
432
|
-
c.setopt(pycurl.WRITEDATA, buffer)
|
|
433
|
-
c.setopt(pycurl.CAINFO, certifi.where())
|
|
434
|
-
c.setopt(pycurl.TIMEOUT, timeout)
|
|
435
|
-
if proxy is not None:
|
|
436
|
-
c.setopt(pycurl.PROXY, proxy)
|
|
437
|
-
c.perform()
|
|
551
|
+
# log helper
|
|
438
552
|
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
553
|
+
def _info(self, msg: str, *arg):
|
|
554
|
+
logging.info(msg, *arg, stacklevel=2)
|
|
555
|
+
if self.verbose:
|
|
556
|
+
print(msg.format(*arg))
|
|
442
557
|
|
|
443
|
-
|
|
444
|
-
|
|
558
|
+
def _warning(self, msg: str, *arg):
|
|
559
|
+
logging.warning(msg, *arg, stacklevel=2)
|
|
560
|
+
if self.verbose:
|
|
561
|
+
print(msg.format(*arg))
|
|
445
562
|
|
|
446
563
|
|
|
447
564
|
if __name__ == "__main__":
|
|
@@ -451,6 +568,14 @@ if __name__ == "__main__":
|
|
|
451
568
|
sub = subparsers.add_parser("download")
|
|
452
569
|
sub.add_argument("-o", "--path", type=str)
|
|
453
570
|
sub.add_argument("--retry", type=int, default=3)
|
|
571
|
+
sub.add_argument("--metadata", type=bool, default=True)
|
|
572
|
+
sub.add_argument(
|
|
573
|
+
"--resources",
|
|
574
|
+
type=str,
|
|
575
|
+
nargs="*",
|
|
576
|
+
default=["morpho_cng"],
|
|
577
|
+
choices=["morpho_cng", "morpho_source", "log_cng", "log_source"],
|
|
578
|
+
)
|
|
454
579
|
sub.add_argument("--proxy", type=str, default=None)
|
|
455
580
|
sub.add_argument("--verbose", type=bool, default=True)
|
|
456
581
|
sub.set_defaults(func=download_neuromorpho)
|