swcgeom 0.15.0__py3-none-any.whl → 0.18.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of swcgeom might be problematic. Click here for more details.
- swcgeom/__init__.py +26 -1
- swcgeom/analysis/__init__.py +21 -8
- swcgeom/analysis/feature_extractor.py +43 -18
- swcgeom/analysis/features.py +250 -0
- swcgeom/analysis/lmeasure.py +857 -0
- swcgeom/analysis/sholl.py +55 -29
- swcgeom/analysis/trunk.py +27 -11
- swcgeom/analysis/visualization.py +24 -9
- swcgeom/analysis/visualization3d.py +100 -0
- swcgeom/analysis/volume.py +19 -4
- swcgeom/core/__init__.py +32 -9
- swcgeom/core/branch.py +28 -7
- swcgeom/core/branch_tree.py +18 -4
- swcgeom/core/{segment.py → compartment.py} +31 -10
- swcgeom/core/node.py +31 -10
- swcgeom/core/path.py +37 -10
- swcgeom/core/population.py +103 -34
- swcgeom/core/swc.py +26 -10
- swcgeom/core/swc_utils/__init__.py +21 -7
- swcgeom/core/swc_utils/assembler.py +27 -1
- swcgeom/core/swc_utils/base.py +25 -12
- swcgeom/core/swc_utils/checker.py +31 -14
- swcgeom/core/swc_utils/io.py +24 -7
- swcgeom/core/swc_utils/normalizer.py +20 -4
- swcgeom/core/swc_utils/subtree.py +17 -2
- swcgeom/core/tree.py +85 -72
- swcgeom/core/tree_utils.py +31 -16
- swcgeom/core/tree_utils_impl.py +18 -3
- swcgeom/images/__init__.py +17 -2
- swcgeom/images/augmentation.py +24 -4
- swcgeom/images/contrast.py +122 -0
- swcgeom/images/folder.py +97 -39
- swcgeom/images/io.py +108 -121
- swcgeom/transforms/__init__.py +28 -10
- swcgeom/transforms/base.py +17 -2
- swcgeom/transforms/branch.py +74 -8
- swcgeom/transforms/branch_tree.py +82 -0
- swcgeom/transforms/geometry.py +22 -7
- swcgeom/transforms/image_preprocess.py +115 -0
- swcgeom/transforms/image_stack.py +37 -13
- swcgeom/transforms/images.py +184 -7
- swcgeom/transforms/mst.py +20 -5
- swcgeom/transforms/neurolucida_asc.py +508 -0
- swcgeom/transforms/path.py +15 -0
- swcgeom/transforms/population.py +16 -3
- swcgeom/transforms/tree.py +89 -31
- swcgeom/transforms/tree_assembler.py +23 -7
- swcgeom/utils/__init__.py +27 -11
- swcgeom/utils/debug.py +15 -0
- swcgeom/utils/download.py +59 -21
- swcgeom/utils/dsu.py +15 -0
- swcgeom/utils/ellipse.py +18 -4
- swcgeom/utils/file.py +15 -0
- swcgeom/utils/neuromorpho.py +439 -302
- swcgeom/utils/numpy_helper.py +29 -4
- swcgeom/utils/plotter_2d.py +151 -0
- swcgeom/utils/plotter_3d.py +48 -0
- swcgeom/utils/renderer.py +49 -145
- swcgeom/utils/sdf.py +24 -8
- swcgeom/utils/solid_geometry.py +16 -3
- swcgeom/utils/transforms.py +17 -4
- swcgeom/utils/volumetric_object.py +23 -10
- {swcgeom-0.15.0.dist-info → swcgeom-0.18.3.dist-info}/LICENSE +1 -1
- {swcgeom-0.15.0.dist-info → swcgeom-0.18.3.dist-info}/METADATA +28 -24
- swcgeom-0.18.3.dist-info/RECORD +67 -0
- {swcgeom-0.15.0.dist-info → swcgeom-0.18.3.dist-info}/WHEEL +1 -1
- swcgeom/_version.py +0 -16
- swcgeom/analysis/branch_features.py +0 -67
- swcgeom/analysis/node_features.py +0 -121
- swcgeom/analysis/path_features.py +0 -37
- swcgeom-0.15.0.dist-info/RECORD +0 -62
- {swcgeom-0.15.0.dist-info → swcgeom-0.18.3.dist-info}/top_level.txt +0 -0
swcgeom/utils/neuromorpho.py
CHANGED
|
@@ -1,9 +1,24 @@
|
|
|
1
|
+
# Copyright 2022-2025 Zexin Yuan
|
|
2
|
+
#
|
|
3
|
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
|
4
|
+
# you may not use this file except in compliance with the License.
|
|
5
|
+
# You may obtain a copy of the License at
|
|
6
|
+
#
|
|
7
|
+
# http://www.apache.org/licenses/LICENSE-2.0
|
|
8
|
+
#
|
|
9
|
+
# Unless required by applicable law or agreed to in writing, software
|
|
10
|
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
|
11
|
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
12
|
+
# See the License for the specific language governing permissions and
|
|
13
|
+
# limitations under the License.
|
|
14
|
+
|
|
15
|
+
|
|
1
16
|
"""NeuroMorpho.org.
|
|
2
17
|
|
|
3
18
|
Examples
|
|
4
19
|
--------
|
|
5
20
|
|
|
6
|
-
Metadata:
|
|
21
|
+
Metadata:
|
|
7
22
|
|
|
8
23
|
```json
|
|
9
24
|
{
|
|
@@ -81,7 +96,10 @@ import logging
|
|
|
81
96
|
import math
|
|
82
97
|
import os
|
|
83
98
|
import urllib.parse
|
|
84
|
-
from
|
|
99
|
+
from collections.abc import Callable, Iterable
|
|
100
|
+
from typing import Any, Literal, Optional
|
|
101
|
+
|
|
102
|
+
from tqdm import tqdm
|
|
85
103
|
|
|
86
104
|
from swcgeom.utils import FileReader
|
|
87
105
|
|
|
@@ -89,24 +107,39 @@ __all__ = [
|
|
|
89
107
|
"neuromorpho_is_valid",
|
|
90
108
|
"neuromorpho_convert_lmdb_to_swc",
|
|
91
109
|
"download_neuromorpho",
|
|
110
|
+
"NeuroMorpho",
|
|
92
111
|
]
|
|
93
112
|
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
113
|
+
|
|
114
|
+
URL_BASE = "https://neuromorpho.org"
|
|
115
|
+
URL_METADATA = "api/neuron"
|
|
116
|
+
URL_MORPHO_CNG = "dableFiles/$ARCHIVE/CNG%20version/$NEURON.CNG.swc"
|
|
117
|
+
URL_MORPHO_SOURCE = "dableFiles/$ARCHIVE/Source-Version/$NEURON.$EXT"
|
|
118
|
+
URL_LOG_CNG = "dableFiles/$ARCHIVE/Remaining%20issues/$NEURON.CNG.swc.std"
|
|
119
|
+
URL_LOG_SOURCE = "dableFiles/$ARCHIVE/Standardization%20log/$NEURON.std"
|
|
120
|
+
API_PAGE_SIZE_MAX = 500
|
|
99
121
|
|
|
100
122
|
KB = 1024
|
|
101
123
|
MB = 1024 * KB
|
|
102
124
|
GB = 1024 * MB
|
|
103
125
|
|
|
104
126
|
# Test version: 8.5.25 (2023-08-01)
|
|
105
|
-
# About 1.1 GB and 18 GB
|
|
106
127
|
# No ETAs for future version
|
|
128
|
+
# Size of metadata about 0.5 GB
|
|
129
|
+
# Size of morpho_cng about 18 GB
|
|
130
|
+
# Not sure about the size of others
|
|
107
131
|
SIZE_METADATA = 2 * GB
|
|
108
132
|
SIZE_DATA = 20 * GB
|
|
109
133
|
|
|
134
|
+
RESOURCES = Literal["morpho_cng", "morpho_source", "log_cng", "log_source"]
|
|
135
|
+
DOWNLOAD_CONFIGS: dict[RESOURCES, tuple[str, int]] = {
|
|
136
|
+
# name/path: (url, size)
|
|
137
|
+
"morpho_cng": (URL_MORPHO_CNG, 20 * GB),
|
|
138
|
+
"morpho_source": (URL_LOG_CNG, 512 * GB),
|
|
139
|
+
"log_cng": (URL_LOG_CNG, 512 * GB),
|
|
140
|
+
"log_source": (URL_LOG_SOURCE, 512 * GB),
|
|
141
|
+
}
|
|
142
|
+
|
|
110
143
|
# fmt:off
|
|
111
144
|
# Test version: 8.5.25 (2023-08-01)
|
|
112
145
|
# No ETAs for future version
|
|
@@ -128,320 +161,416 @@ invalid_ids = [
|
|
|
128
161
|
# fmt: on
|
|
129
162
|
|
|
130
163
|
|
|
131
|
-
def neuromorpho_is_valid(metadata:
|
|
164
|
+
def neuromorpho_is_valid(metadata: dict[str, Any]) -> bool:
|
|
132
165
|
return metadata["neuron_id"] not in invalid_ids
|
|
133
166
|
|
|
134
167
|
|
|
135
|
-
# pylint: disable-next=too-many-locals
|
|
136
168
|
def neuromorpho_convert_lmdb_to_swc(
|
|
137
|
-
root: str,
|
|
138
|
-
dest: Optional[str] = None,
|
|
139
|
-
*,
|
|
140
|
-
group_by: Optional[str | Callable[[Dict[str, Any]], str | None]] = None,
|
|
141
|
-
where: Optional[Callable[[Dict[str, Any]], bool]] = None,
|
|
142
|
-
encoding: str | None = "utf-8",
|
|
143
|
-
verbose: bool = False,
|
|
169
|
+
root: str, dest: Optional[str] = None, *, verbose: bool = False, **kwargs
|
|
144
170
|
) -> None:
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
else os.path.join(dest, f"{kk}.swc")
|
|
171
|
+
nmo = NeuroMorpho(root, verbose=verbose)
|
|
172
|
+
nmo.convert_lmdb_to_swc(dest, **kwargs)
|
|
173
|
+
|
|
174
|
+
|
|
175
|
+
def download_neuromorpho(path: str, *, verbose: bool = False, **kwargs) -> None:
|
|
176
|
+
nmo = NeuroMorpho(path, verbose=verbose)
|
|
177
|
+
nmo.download(**kwargs)
|
|
178
|
+
|
|
179
|
+
|
|
180
|
+
class NeuroMorpho:
|
|
181
|
+
def __init__(
|
|
182
|
+
self, root: str, *, url_base: str = URL_BASE, verbose: bool = False
|
|
183
|
+
) -> None:
|
|
184
|
+
"""
|
|
185
|
+
Parameters
|
|
186
|
+
----------
|
|
187
|
+
root : str
|
|
188
|
+
verbose : bool, default False
|
|
189
|
+
Show verbose log.
|
|
190
|
+
"""
|
|
191
|
+
|
|
192
|
+
super().__init__()
|
|
193
|
+
self.root = root
|
|
194
|
+
self.url_base = url_base
|
|
195
|
+
self.verbose = verbose
|
|
196
|
+
|
|
197
|
+
def download(
|
|
198
|
+
self,
|
|
199
|
+
*,
|
|
200
|
+
retry: int = 3,
|
|
201
|
+
metadata: bool = True,
|
|
202
|
+
resources: Iterable[RESOURCES] = ["morpho_cng"],
|
|
203
|
+
**kwargs,
|
|
204
|
+
) -> None:
|
|
205
|
+
"""Download data from neuromorpho.org."""
|
|
206
|
+
|
|
207
|
+
# metadata
|
|
208
|
+
path_m = os.path.join(self.root, "metadata")
|
|
209
|
+
if metadata:
|
|
210
|
+
err_pages = None
|
|
211
|
+
for i in range(retry + 1):
|
|
212
|
+
if err_pages is not None and len(err_pages) == 0:
|
|
213
|
+
break
|
|
214
|
+
|
|
215
|
+
self._info("download metadata")
|
|
216
|
+
if i != 0:
|
|
217
|
+
self._info("retry %d: %s", i, json.dumps(err_pages))
|
|
218
|
+
|
|
219
|
+
err_pages = self._download_metadata(path_m, pages=err_pages, **kwargs)
|
|
220
|
+
|
|
221
|
+
self._info("download metadata done")
|
|
222
|
+
if err_pages is not None and len(err_pages) != 0:
|
|
223
|
+
self._warning("fails to download metadata: %s", json.dumps(err_pages))
|
|
224
|
+
else:
|
|
225
|
+
self._info("skip download metadata")
|
|
226
|
+
|
|
227
|
+
# file
|
|
228
|
+
def dumps(keys: list[bytes]) -> str:
|
|
229
|
+
return json.dumps([i.decode("utf-8") for i in keys])
|
|
230
|
+
|
|
231
|
+
for name in resources:
|
|
232
|
+
url, map_size = DOWNLOAD_CONFIGS[name]
|
|
233
|
+
path = os.path.join(self.root, name)
|
|
234
|
+
|
|
235
|
+
err_keys = None
|
|
236
|
+
for i in range(retry + 1):
|
|
237
|
+
if err_keys is not None and len(err_keys) == 0:
|
|
238
|
+
break
|
|
239
|
+
|
|
240
|
+
self._info("download %s", name)
|
|
241
|
+
if err_keys is not None:
|
|
242
|
+
self._info("retry %d: %s", i, dumps(err_keys))
|
|
243
|
+
|
|
244
|
+
err_keys = self._download_files(
|
|
245
|
+
url, path, path_m, map_size=map_size, **kwargs
|
|
221
246
|
)
|
|
222
247
|
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
|
|
248
|
+
self._info("download %s done", name)
|
|
249
|
+
if err_keys is not None and len(err_keys) != 0:
|
|
250
|
+
self._warning("fails to download %s: %s", name, dumps(err_keys))
|
|
251
|
+
|
|
252
|
+
# pylint: disable-next=too-many-locals
|
|
253
|
+
def convert_lmdb_to_swc(
|
|
254
|
+
self,
|
|
255
|
+
dest: Optional[str] = None,
|
|
256
|
+
*,
|
|
257
|
+
group_by: Optional[str | Callable[[dict[str, Any]], str | None]] = None,
|
|
258
|
+
where: Optional[Callable[[dict[str, Any]], bool]] = None,
|
|
259
|
+
encoding: str | None = "utf-8",
|
|
260
|
+
) -> None:
|
|
261
|
+
r"""Convert lmdb format to SWCs.
|
|
262
|
+
|
|
263
|
+
Parameters
|
|
264
|
+
----------
|
|
265
|
+
path : str
|
|
266
|
+
dest : str, optional
|
|
267
|
+
If None, use `path/swc`.
|
|
268
|
+
group_by : str | (metadata: dict[str, Any]) -> str | None, optional
|
|
269
|
+
Group neurons by metadata. If a None is returned then no
|
|
270
|
+
grouping. If a string is entered, use it as a metadata
|
|
271
|
+
attribute name for grouping, e.g.: `archive`, `species`.
|
|
272
|
+
where : (metadata: dict[str, Any]) -> bool, optional
|
|
273
|
+
Filter neurons by metadata.
|
|
274
|
+
encoding : str | None, default to `utf-8`
|
|
275
|
+
Change swc encoding, part of the original data is not utf-8
|
|
276
|
+
encoded. If is None, keep the original encoding format.
|
|
277
|
+
verbose : bool, default False
|
|
278
|
+
Print verbose info.
|
|
279
|
+
|
|
280
|
+
Notes
|
|
281
|
+
-----
|
|
282
|
+
We are asserting the following folder.
|
|
283
|
+
|
|
284
|
+
```text
|
|
285
|
+
|- root
|
|
286
|
+
| |- metadata # input
|
|
287
|
+
| |- morpho_cng # input
|
|
288
|
+
| |- swc # output
|
|
289
|
+
| | |- groups # output of groups if grouped
|
|
290
|
+
```
|
|
291
|
+
|
|
292
|
+
See Also
|
|
293
|
+
--------
|
|
294
|
+
neuromorpho_is_valid :
|
|
295
|
+
Recommended filter function, try `where=neuromorpho_is_valid`
|
|
296
|
+
"""
|
|
297
|
+
|
|
298
|
+
import lmdb
|
|
299
|
+
|
|
300
|
+
env_m = lmdb.Environment(os.path.join(self.root, "metadata"), readonly=True)
|
|
301
|
+
with env_m.begin() as tx_m:
|
|
302
|
+
where = where or (lambda _: True)
|
|
303
|
+
if isinstance(group_by, str):
|
|
304
|
+
key = group_by
|
|
305
|
+
group_by = lambda v: v[key] # pylint: disable=unnecessary-lambda-assignment
|
|
306
|
+
elif group_by is None:
|
|
307
|
+
group_by = lambda _: None # pylint: disable=unnecessary-lambda-assignment
|
|
308
|
+
items = []
|
|
309
|
+
for k, v in tx_m.cursor():
|
|
310
|
+
metadata = json.loads(v)
|
|
311
|
+
if where(metadata):
|
|
312
|
+
items.append((k, group_by(metadata)))
|
|
313
|
+
|
|
314
|
+
env_m.close()
|
|
315
|
+
|
|
316
|
+
dest = dest or os.path.join(self.root, "swc")
|
|
317
|
+
os.makedirs(dest, exist_ok=True)
|
|
318
|
+
for grp in set(grp for _, grp in items if grp is not None):
|
|
319
|
+
os.makedirs(os.path.join(dest, grp), exist_ok=True)
|
|
320
|
+
|
|
321
|
+
env_c = lmdb.Environment(os.path.join(self.root, "morpho_cng"), readonly=True)
|
|
322
|
+
with env_c.begin() as tx_c:
|
|
323
|
+
for k, grp in tqdm(items) if self.verbose else items:
|
|
324
|
+
kk = k.decode("utf-8")
|
|
325
|
+
try:
|
|
326
|
+
bs = tx_c.get(k)
|
|
327
|
+
if bs is None:
|
|
328
|
+
self._warning("morpho_cng of '%s' not exists", kk)
|
|
329
|
+
continue
|
|
330
|
+
|
|
331
|
+
fs = (
|
|
332
|
+
os.path.join(dest, grp, f"{kk}.swc")
|
|
333
|
+
if grp is not None
|
|
334
|
+
else os.path.join(dest, f"{kk}.swc")
|
|
335
|
+
)
|
|
336
|
+
|
|
337
|
+
if encoding is None:
|
|
338
|
+
with open(fs, "wb") as f:
|
|
339
|
+
f.write(bs) # type: ignore
|
|
340
|
+
else:
|
|
341
|
+
bs = io.BytesIO(bs) # type: ignore
|
|
342
|
+
with (
|
|
343
|
+
open(fs, "w", encoding=encoding) as fw,
|
|
344
|
+
FileReader(bs, encoding="detect") as fr,
|
|
345
|
+
):
|
|
346
|
+
fw.writelines(fr.readlines())
|
|
347
|
+
except (IOError, lmdb.Error) as e:
|
|
348
|
+
self._warning("fails to convert of %s, err: %s", kk, e)
|
|
349
|
+
|
|
350
|
+
env_c.close()
|
|
351
|
+
|
|
352
|
+
# Downloader
|
|
353
|
+
|
|
354
|
+
def _download_metadata(
|
|
355
|
+
self,
|
|
356
|
+
path: str,
|
|
357
|
+
*,
|
|
358
|
+
pages: Optional[Iterable[int]] = None,
|
|
359
|
+
page_size: int = API_PAGE_SIZE_MAX,
|
|
360
|
+
**kwargs,
|
|
361
|
+
) -> list[int]:
|
|
362
|
+
r"""Download all neuron metadata.
|
|
363
|
+
|
|
364
|
+
Parameters
|
|
365
|
+
----------
|
|
366
|
+
path : str
|
|
367
|
+
Path to save data.
|
|
368
|
+
pages : List of int, optional
|
|
369
|
+
If is None, download all pages.
|
|
370
|
+
verbose : bool, default False
|
|
371
|
+
Show verbose log.
|
|
372
|
+
**kwargs :
|
|
373
|
+
Forwarding to `get`.
|
|
374
|
+
|
|
375
|
+
Returns
|
|
376
|
+
-------
|
|
377
|
+
err_pages : List of int
|
|
378
|
+
Failed pages.
|
|
379
|
+
"""
|
|
380
|
+
|
|
381
|
+
# TODO: how to cache between versions?
|
|
382
|
+
import lmdb
|
|
383
|
+
|
|
384
|
+
env = lmdb.Environment(path, map_size=SIZE_METADATA)
|
|
385
|
+
if pages is None:
|
|
386
|
+
res = self._get_metadata(page=0, page_size=1, **kwargs)
|
|
387
|
+
total = res["page"]["totalElements"]
|
|
388
|
+
pages = range(math.ceil(total / page_size))
|
|
389
|
+
|
|
390
|
+
err_pages = []
|
|
391
|
+
for page in tqdm(pages) if self.verbose else pages:
|
|
392
|
+
try:
|
|
393
|
+
res = self._get_metadata(page, page_size=page_size, **kwargs)
|
|
394
|
+
with env.begin(write=True) as tx:
|
|
395
|
+
for neuron in res["_embedded"]["neuronResources"]:
|
|
396
|
+
k = str(neuron["neuron_id"]).encode("utf-8")
|
|
397
|
+
v = json.dumps(neuron).encode("utf-8")
|
|
398
|
+
tx.put(key=k, value=v)
|
|
399
|
+
except IOError as e:
|
|
400
|
+
err_pages.append(page)
|
|
401
|
+
self._warning("fails to get metadata of page %s, err: %s", page, e)
|
|
402
|
+
|
|
403
|
+
env.close()
|
|
404
|
+
return err_pages
|
|
405
|
+
|
|
406
|
+
# pylint: disable-next=too-many-locals
|
|
407
|
+
def _download_files(
|
|
408
|
+
self,
|
|
409
|
+
url: str,
|
|
410
|
+
path: str,
|
|
411
|
+
path_metadata: str,
|
|
412
|
+
*,
|
|
413
|
+
keys: Optional[Iterable[bytes]] = None,
|
|
414
|
+
override: bool = False,
|
|
415
|
+
map_size: int = 512 * GB,
|
|
416
|
+
**kwargs,
|
|
417
|
+
) -> list[bytes]:
|
|
418
|
+
"""Download files.
|
|
419
|
+
|
|
420
|
+
Parameters
|
|
421
|
+
----------
|
|
422
|
+
url : str
|
|
423
|
+
path : str
|
|
424
|
+
Path to save data.
|
|
425
|
+
path_metadata : str
|
|
426
|
+
Path to lmdb of metadata.
|
|
427
|
+
keys : List of bytes, optional
|
|
428
|
+
If exist, ignore `override` option. If None, download all key.
|
|
429
|
+
override : bool, default False
|
|
430
|
+
Override even exists.
|
|
431
|
+
map_size : int, default 512GB
|
|
432
|
+
**kwargs :
|
|
433
|
+
Forwarding to `get`.
|
|
434
|
+
|
|
435
|
+
Returns
|
|
436
|
+
-------
|
|
437
|
+
err_keys : List of str
|
|
438
|
+
Failed keys.
|
|
439
|
+
"""
|
|
440
|
+
|
|
441
|
+
import lmdb
|
|
442
|
+
|
|
443
|
+
env_m = lmdb.Environment(path_metadata, map_size=SIZE_METADATA, readonly=True)
|
|
444
|
+
env_c = lmdb.Environment(path, map_size=map_size)
|
|
445
|
+
if keys is None:
|
|
446
|
+
with env_m.begin() as tx_m:
|
|
447
|
+
if override:
|
|
448
|
+
keys = [k for k, v in tx_m.cursor()]
|
|
226
449
|
else:
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
open(fs, "w", encoding=encoding) as fw,
|
|
230
|
-
FileReader(bs, encoding="detect") as fr,
|
|
231
|
-
):
|
|
232
|
-
fw.writelines(fr.readlines())
|
|
233
|
-
except Exception as e: # pylint: disable=broad-exception-caught
|
|
234
|
-
logging.warning("fails to convert of %s, err: %s", kk, e)
|
|
235
|
-
|
|
236
|
-
env_c.close()
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
def download_neuromorpho(
|
|
240
|
-
path: str, *, retry: int = 3, verbose: bool = False, **kwargs
|
|
241
|
-
) -> None:
|
|
242
|
-
kwargs.setdefault("verbose", verbose)
|
|
450
|
+
with env_c.begin() as tx:
|
|
451
|
+
keys = [k for k, v in tx_m.cursor() if tx.get(k) is None]
|
|
243
452
|
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
249
|
-
|
|
250
|
-
|
|
251
|
-
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
"download metadata pages failed after %d retry: %s",
|
|
259
|
-
retry,
|
|
260
|
-
json.dumps(err_pages),
|
|
261
|
-
)
|
|
453
|
+
err_keys = []
|
|
454
|
+
for k in tqdm(keys) if self.verbose else keys:
|
|
455
|
+
try:
|
|
456
|
+
with env_m.begin() as tx:
|
|
457
|
+
metadata = json.loads(tx.get(k).decode("utf-8")) # type: ignore
|
|
458
|
+
|
|
459
|
+
swc = self._get_file(url, metadata, **kwargs)
|
|
460
|
+
with env_c.begin(write=True) as tx:
|
|
461
|
+
tx.put(key=k, value=swc)
|
|
462
|
+
except IOError as e:
|
|
463
|
+
err_keys.append(k)
|
|
464
|
+
self._warning(
|
|
465
|
+
"fails to get morphology file `%s`, err: %s", k.decode("utf-8"), e
|
|
466
|
+
)
|
|
262
467
|
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
468
|
+
env_m.close()
|
|
469
|
+
env_c.close()
|
|
470
|
+
return err_keys
|
|
471
|
+
|
|
472
|
+
def _get_metadata(
|
|
473
|
+
self, page: int, page_size: int = API_PAGE_SIZE_MAX, **kwargs
|
|
474
|
+
) -> dict[str, Any]:
|
|
475
|
+
params = {
|
|
476
|
+
"page": page,
|
|
477
|
+
"size": page_size,
|
|
478
|
+
"sort": "neuron_id,neuron_id,asc",
|
|
479
|
+
}
|
|
480
|
+
query = "&".join([f"{k}={v}" for k, v in params.items()])
|
|
481
|
+
url = f"{URL_METADATA}?{query}"
|
|
482
|
+
resp = self._get(url, **kwargs)
|
|
483
|
+
return json.loads(resp)
|
|
484
|
+
|
|
485
|
+
def _get_file(self, url: str, metadata: dict[str, Any], **kwargs) -> bytes:
|
|
486
|
+
"""Get file.
|
|
487
|
+
|
|
488
|
+
Returns
|
|
489
|
+
-------
|
|
490
|
+
bs : bytes
|
|
491
|
+
Bytes of morphology file, encoding is NOT FIXED.
|
|
492
|
+
"""
|
|
493
|
+
|
|
494
|
+
archive = urllib.parse.quote(metadata["archive"].lower())
|
|
495
|
+
neuron = urllib.parse.quote(metadata["neuron_name"])
|
|
496
|
+
ext = self._guess_ext(metadata)
|
|
497
|
+
url = (
|
|
498
|
+
url.replace("$ARCHIVE", archive)
|
|
499
|
+
.replace("$NEURON", neuron)
|
|
500
|
+
.replace("$EXT", ext)
|
|
278
501
|
)
|
|
502
|
+
return self._get(url, **kwargs)
|
|
503
|
+
|
|
504
|
+
def _get(
|
|
505
|
+
self, url: str, *, timeout: int = 2 * 60, proxy: Optional[str] = None
|
|
506
|
+
) -> bytes:
|
|
507
|
+
if not url.startswith("http://") and not url.startswith("https://"):
|
|
508
|
+
url = urllib.parse.urljoin(self.url_base, url)
|
|
509
|
+
|
|
510
|
+
proxies = None
|
|
511
|
+
if proxy is not None:
|
|
512
|
+
proxies = {"http": proxy, "https": proxy}
|
|
513
|
+
|
|
514
|
+
response = self._session().get(url, timeout=timeout, proxies=proxies)
|
|
515
|
+
response.raise_for_status()
|
|
516
|
+
return response.content
|
|
517
|
+
|
|
518
|
+
def _session(self) -> Any:
|
|
519
|
+
if hasattr(self, "session"):
|
|
520
|
+
return self.session
|
|
521
|
+
|
|
522
|
+
import requests
|
|
523
|
+
import requests.adapters
|
|
524
|
+
import urllib3
|
|
525
|
+
import urllib3.util
|
|
526
|
+
|
|
527
|
+
class CustomSSLContextHTTPAdapter(requests.adapters.HTTPAdapter):
|
|
528
|
+
def __init__(self, ssl_context=None, **kwargs):
|
|
529
|
+
self.ssl_context = ssl_context
|
|
530
|
+
super().__init__(**kwargs)
|
|
531
|
+
|
|
532
|
+
def init_poolmanager(self, connections, maxsize, block=False):
|
|
533
|
+
super().init_poolmanager(
|
|
534
|
+
connections, maxsize, block, ssl_context=self.ssl_context
|
|
535
|
+
)
|
|
279
536
|
|
|
537
|
+
def proxy_manager_for(self, proxy, **proxy_kwargs):
|
|
538
|
+
return super().proxy_manager_for(
|
|
539
|
+
proxy, **proxy_kwargs, ssl_context=self.ssl_context
|
|
540
|
+
)
|
|
280
541
|
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
)
|
|
284
|
-
"""Download all neuron metadata.
|
|
285
|
-
|
|
286
|
-
Parameters
|
|
287
|
-
----------
|
|
288
|
-
path : str
|
|
289
|
-
Path to save data.
|
|
290
|
-
pages : list of int, optional
|
|
291
|
-
If is None, download all pages.
|
|
292
|
-
verbose : bool, default False
|
|
293
|
-
Show verbose log.
|
|
294
|
-
**kwargs :
|
|
295
|
-
Forwarding to `get`.
|
|
296
|
-
|
|
297
|
-
Returns
|
|
298
|
-
-------
|
|
299
|
-
err_pages : list of int
|
|
300
|
-
Failed pages.
|
|
301
|
-
"""
|
|
302
|
-
# TODO: how to cache between versions?
|
|
303
|
-
import lmdb
|
|
304
|
-
from tqdm import tqdm
|
|
305
|
-
|
|
306
|
-
env = lmdb.Environment(path, map_size=SIZE_METADATA)
|
|
307
|
-
page_size = API_NEURON_MAX_SIZE
|
|
308
|
-
if pages is None:
|
|
309
|
-
res = get_metadata(page=0, page_size=1, **kwargs)
|
|
310
|
-
total = res["page"]["totalElements"]
|
|
311
|
-
pages = range(math.ceil(total / page_size))
|
|
312
|
-
|
|
313
|
-
err_pages = []
|
|
314
|
-
for page in tqdm(pages) if verbose else pages:
|
|
315
|
-
try:
|
|
316
|
-
res = get_metadata(page, page_size=page_size, **kwargs)
|
|
317
|
-
with env.begin(write=True) as tx:
|
|
318
|
-
for neuron in res["_embedded"]["neuronResources"]:
|
|
319
|
-
k = str(neuron["neuron_id"]).encode("utf-8")
|
|
320
|
-
v = json.dumps(neuron).encode("utf-8")
|
|
321
|
-
tx.put(key=k, value=v)
|
|
322
|
-
except Exception as e: # pylint: disable=broad-exception-caught
|
|
323
|
-
err_pages.append(page)
|
|
324
|
-
logging.warning("fails to get metadata of page %s, err: %s", page, e)
|
|
325
|
-
|
|
326
|
-
env.close()
|
|
327
|
-
return err_pages
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
# pylint: disable-next=too-many-locals
|
|
331
|
-
def download_cng_version(
|
|
332
|
-
path: str,
|
|
333
|
-
path_metadata: str,
|
|
334
|
-
*,
|
|
335
|
-
keys: Optional[Iterable[bytes]] = None,
|
|
336
|
-
override: bool = False,
|
|
337
|
-
verbose: bool = False,
|
|
338
|
-
**kwargs,
|
|
339
|
-
) -> List[bytes]:
|
|
340
|
-
"""Download GNG version swc.
|
|
341
|
-
|
|
342
|
-
Parameters
|
|
343
|
-
----------
|
|
344
|
-
path : str
|
|
345
|
-
Path to save data.
|
|
346
|
-
path_metadata : str
|
|
347
|
-
Path to lmdb of metadata.
|
|
348
|
-
keys : list of bytes, optional
|
|
349
|
-
If exist, ignore `override` option. If None, download all key.
|
|
350
|
-
override : bool, default False
|
|
351
|
-
Override even exists.
|
|
352
|
-
verbose : bool, default False
|
|
353
|
-
Show verbose log.
|
|
354
|
-
**kwargs :
|
|
355
|
-
Forwarding to `get`.
|
|
356
|
-
|
|
357
|
-
Returns
|
|
358
|
-
-------
|
|
359
|
-
err_keys : list of str
|
|
360
|
-
Failed keys.
|
|
361
|
-
"""
|
|
362
|
-
import lmdb
|
|
363
|
-
from tqdm import tqdm
|
|
364
|
-
|
|
365
|
-
env_m = lmdb.Environment(path_metadata, map_size=SIZE_METADATA, readonly=True)
|
|
366
|
-
env_c = lmdb.Environment(path, map_size=SIZE_DATA)
|
|
367
|
-
if keys is None:
|
|
368
|
-
with env_m.begin() as tx_m:
|
|
369
|
-
if override:
|
|
370
|
-
keys = [k for k, v in tx_m.cursor()]
|
|
371
|
-
else:
|
|
372
|
-
with env_c.begin() as tx:
|
|
373
|
-
keys = [k for k, v in tx_m.cursor() if tx.get(k) is None]
|
|
374
|
-
|
|
375
|
-
err_keys = []
|
|
376
|
-
for k in tqdm(keys) if verbose else keys:
|
|
377
|
-
try:
|
|
378
|
-
with env_m.begin() as tx:
|
|
379
|
-
metadata = json.loads(tx.get(k).decode("utf-8")) # type: ignore
|
|
380
|
-
|
|
381
|
-
swc = get_cng_version(metadata, **kwargs)
|
|
382
|
-
with env_c.begin(write=True) as tx:
|
|
383
|
-
tx.put(key=k, value=swc)
|
|
384
|
-
except Exception as e: # pylint: disable=broad-exception-caught
|
|
385
|
-
err_keys.append(k)
|
|
386
|
-
logging.warning(
|
|
387
|
-
"fails to get cng version of '%s', err: %s", k.decode("utf-8"), e
|
|
388
|
-
)
|
|
389
|
-
|
|
390
|
-
env_m.close()
|
|
391
|
-
env_c.close()
|
|
392
|
-
return err_keys
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
def get_metadata(
|
|
396
|
-
page, page_size: int = API_NEURON_MAX_SIZE, **kwargs
|
|
397
|
-
) -> Dict[str, Any]:
|
|
398
|
-
params = {
|
|
399
|
-
"page": page,
|
|
400
|
-
"size": page_size,
|
|
401
|
-
"sort": "neuron_id,neuron_id,asc",
|
|
402
|
-
}
|
|
403
|
-
query = "&".join([f"{k}={v}" for k, v in params.items()])
|
|
404
|
-
url = f"{URL_NEURON}?{query}"
|
|
405
|
-
|
|
406
|
-
s = get(url, **kwargs)
|
|
407
|
-
return json.loads(s)
|
|
408
|
-
|
|
542
|
+
ctx = urllib3.util.create_urllib3_context()
|
|
543
|
+
ctx.load_default_certs()
|
|
544
|
+
ctx.set_ciphers("DEFAULT@SECLEVEL=1")
|
|
409
545
|
|
|
410
|
-
|
|
411
|
-
|
|
546
|
+
session = requests.session()
|
|
547
|
+
session.adapters.pop("https://", None)
|
|
548
|
+
session.mount("https://", CustomSSLContextHTTPAdapter(ssl_context=ctx))
|
|
412
549
|
|
|
413
|
-
|
|
414
|
-
|
|
415
|
-
bs : bytes
|
|
416
|
-
SWC bytes, encoding is NOT FIXED.
|
|
417
|
-
"""
|
|
418
|
-
archive = urllib.parse.quote(metadata["archive"].lower())
|
|
419
|
-
neuron = urllib.parse.quote(metadata["neuron_name"])
|
|
420
|
-
url = URL_CNG_VERSION.replace("$ARCHIVE", archive).replace("$NEURON", neuron)
|
|
421
|
-
return get(url, **kwargs)
|
|
550
|
+
self.session = session
|
|
551
|
+
return session
|
|
422
552
|
|
|
553
|
+
# format
|
|
554
|
+
def _guess_ext(self, metadata) -> str:
|
|
555
|
+
match metadata["original_format"]:
|
|
556
|
+
case "Custom.xml":
|
|
557
|
+
return "morph.xml"
|
|
423
558
|
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
|
|
427
|
-
import pycurl
|
|
559
|
+
case _:
|
|
560
|
+
_, ext = os.path.splitext(metadata["original_format"])
|
|
561
|
+
return ext[1:]
|
|
428
562
|
|
|
429
|
-
|
|
430
|
-
c = pycurl.Curl()
|
|
431
|
-
c.setopt(pycurl.URL, url)
|
|
432
|
-
c.setopt(pycurl.WRITEDATA, buffer)
|
|
433
|
-
c.setopt(pycurl.CAINFO, certifi.where())
|
|
434
|
-
c.setopt(pycurl.TIMEOUT, timeout)
|
|
435
|
-
if proxy is not None:
|
|
436
|
-
c.setopt(pycurl.PROXY, proxy)
|
|
437
|
-
c.perform()
|
|
563
|
+
# log helper
|
|
438
564
|
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
565
|
+
def _info(self, msg: str, *arg):
|
|
566
|
+
logging.info(msg, *arg, stacklevel=2)
|
|
567
|
+
if self.verbose:
|
|
568
|
+
print(msg.format(*arg))
|
|
442
569
|
|
|
443
|
-
|
|
444
|
-
|
|
570
|
+
def _warning(self, msg: str, *arg):
|
|
571
|
+
logging.warning(msg, *arg, stacklevel=2)
|
|
572
|
+
if self.verbose:
|
|
573
|
+
print(msg.format(*arg))
|
|
445
574
|
|
|
446
575
|
|
|
447
576
|
if __name__ == "__main__":
|
|
@@ -451,6 +580,14 @@ if __name__ == "__main__":
|
|
|
451
580
|
sub = subparsers.add_parser("download")
|
|
452
581
|
sub.add_argument("-o", "--path", type=str)
|
|
453
582
|
sub.add_argument("--retry", type=int, default=3)
|
|
583
|
+
sub.add_argument("--metadata", type=bool, default=True)
|
|
584
|
+
sub.add_argument(
|
|
585
|
+
"--resources",
|
|
586
|
+
type=str,
|
|
587
|
+
nargs="*",
|
|
588
|
+
default=["morpho_cng"],
|
|
589
|
+
choices=["morpho_cng", "morpho_source", "log_cng", "log_source"],
|
|
590
|
+
)
|
|
454
591
|
sub.add_argument("--proxy", type=str, default=None)
|
|
455
592
|
sub.add_argument("--verbose", type=bool, default=True)
|
|
456
593
|
sub.set_defaults(func=download_neuromorpho)
|