swcgeom 0.19.4__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of swcgeom might be problematic. Click here for more details.
- swcgeom/__init__.py +21 -0
- swcgeom/analysis/__init__.py +13 -0
- swcgeom/analysis/feature_extractor.py +454 -0
- swcgeom/analysis/features.py +218 -0
- swcgeom/analysis/lmeasure.py +750 -0
- swcgeom/analysis/sholl.py +201 -0
- swcgeom/analysis/trunk.py +183 -0
- swcgeom/analysis/visualization.py +191 -0
- swcgeom/analysis/visualization3d.py +81 -0
- swcgeom/analysis/volume.py +143 -0
- swcgeom/core/__init__.py +19 -0
- swcgeom/core/branch.py +129 -0
- swcgeom/core/branch_tree.py +65 -0
- swcgeom/core/compartment.py +107 -0
- swcgeom/core/node.py +130 -0
- swcgeom/core/path.py +155 -0
- swcgeom/core/population.py +341 -0
- swcgeom/core/swc.py +247 -0
- swcgeom/core/swc_utils/__init__.py +19 -0
- swcgeom/core/swc_utils/assembler.py +35 -0
- swcgeom/core/swc_utils/base.py +180 -0
- swcgeom/core/swc_utils/checker.py +107 -0
- swcgeom/core/swc_utils/io.py +204 -0
- swcgeom/core/swc_utils/normalizer.py +163 -0
- swcgeom/core/swc_utils/subtree.py +70 -0
- swcgeom/core/tree.py +384 -0
- swcgeom/core/tree_utils.py +277 -0
- swcgeom/core/tree_utils_impl.py +58 -0
- swcgeom/images/__init__.py +9 -0
- swcgeom/images/augmentation.py +149 -0
- swcgeom/images/contrast.py +87 -0
- swcgeom/images/folder.py +217 -0
- swcgeom/images/io.py +578 -0
- swcgeom/images/loaders/__init__.py +8 -0
- swcgeom/images/loaders/pbd.cp311-win_amd64.pyd +0 -0
- swcgeom/images/loaders/pbd.pyx +523 -0
- swcgeom/images/loaders/raw.cp311-win_amd64.pyd +0 -0
- swcgeom/images/loaders/raw.pyx +183 -0
- swcgeom/transforms/__init__.py +20 -0
- swcgeom/transforms/base.py +136 -0
- swcgeom/transforms/branch.py +223 -0
- swcgeom/transforms/branch_tree.py +74 -0
- swcgeom/transforms/geometry.py +270 -0
- swcgeom/transforms/image_preprocess.py +107 -0
- swcgeom/transforms/image_stack.py +219 -0
- swcgeom/transforms/images.py +206 -0
- swcgeom/transforms/mst.py +183 -0
- swcgeom/transforms/neurolucida_asc.py +498 -0
- swcgeom/transforms/path.py +56 -0
- swcgeom/transforms/population.py +36 -0
- swcgeom/transforms/tree.py +265 -0
- swcgeom/transforms/tree_assembler.py +161 -0
- swcgeom/utils/__init__.py +18 -0
- swcgeom/utils/debug.py +23 -0
- swcgeom/utils/download.py +119 -0
- swcgeom/utils/dsu.py +58 -0
- swcgeom/utils/ellipse.py +131 -0
- swcgeom/utils/file.py +90 -0
- swcgeom/utils/neuromorpho.py +581 -0
- swcgeom/utils/numpy_helper.py +70 -0
- swcgeom/utils/plotter_2d.py +134 -0
- swcgeom/utils/plotter_3d.py +35 -0
- swcgeom/utils/renderer.py +145 -0
- swcgeom/utils/sdf.py +324 -0
- swcgeom/utils/solid_geometry.py +154 -0
- swcgeom/utils/transforms.py +367 -0
- swcgeom/utils/volumetric_object.py +483 -0
- swcgeom-0.19.4.dist-info/METADATA +86 -0
- swcgeom-0.19.4.dist-info/RECORD +72 -0
- swcgeom-0.19.4.dist-info/WHEEL +5 -0
- swcgeom-0.19.4.dist-info/licenses/LICENSE +201 -0
- swcgeom-0.19.4.dist-info/top_level.txt +1 -0
|
@@ -0,0 +1,581 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2022 - 2025 Zexin Yuan <pypi@yzx9.xyz>
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
|
|
5
|
+
"""NeuroMorpho.org.
|
|
6
|
+
|
|
7
|
+
Metadata Example:
|
|
8
|
+
|
|
9
|
+
```json
|
|
10
|
+
{
|
|
11
|
+
'neuron_id': 1,
|
|
12
|
+
'neuron_name': 'cnic_001',
|
|
13
|
+
'archive': 'Wearne_Hof',
|
|
14
|
+
'note': 'When originally released, this reconstruction had been incompletely processed, and this issue was fixed in release 6.1 (May 2015). The pre-6.1 version of the processed file is available for download <a href=" dableFiles/previous/v6.1/wearne_hof/cnic_001.CNG.swc ">here</a>.',
|
|
15
|
+
'age_scale': 'Year',
|
|
16
|
+
'gender': 'Male/Female',
|
|
17
|
+
'age_classification': 'old',
|
|
18
|
+
'brain_region': ['neocortex', 'prefrontal', 'layer 3'],
|
|
19
|
+
'cell_type': ['Local projecting', 'pyramidal', 'principal cell'],
|
|
20
|
+
'species': 'monkey',
|
|
21
|
+
'strain': 'Rhesus',
|
|
22
|
+
'scientific_name': 'Macaca mulatta',
|
|
23
|
+
'stain': 'lucifer yellow',
|
|
24
|
+
'experiment_condition': ['Control'],
|
|
25
|
+
'protocol': 'in vivo',
|
|
26
|
+
'slicing_direction': 'custom',
|
|
27
|
+
'reconstruction_software': 'Neurozoom',
|
|
28
|
+
'objective_type': 'Not reported',
|
|
29
|
+
'original_format': 'Neurozoom.swc',
|
|
30
|
+
'domain': 'Dendrites, Soma, No Axon',
|
|
31
|
+
'attributes': 'Diameter, 3D, Angles',
|
|
32
|
+
'magnification': '100',
|
|
33
|
+
'upload_date': '2006-08-01',
|
|
34
|
+
'deposition_date': '2005-12-31',
|
|
35
|
+
'shrinkage_reported': 'Reported',
|
|
36
|
+
'shrinkage_corrected': 'Not Corrected',
|
|
37
|
+
'reported_value': None,
|
|
38
|
+
'reported_xy': None,
|
|
39
|
+
'reported_z': None,
|
|
40
|
+
'corrected_value': None,
|
|
41
|
+
'corrected_xy': None,
|
|
42
|
+
'corrected_z': None,
|
|
43
|
+
'soma_surface': '834.0',
|
|
44
|
+
'surface': '8842.91',
|
|
45
|
+
'volume': '4725.89',
|
|
46
|
+
'slicing_thickness': '400',
|
|
47
|
+
'min_age': '24.0',
|
|
48
|
+
'max_age': '25.0',
|
|
49
|
+
'min_weight': '4500.0',
|
|
50
|
+
'max_weight': '10000.0',
|
|
51
|
+
'png_url': 'http://neuromorpho.org/images/imageFiles/Wearne_Hof/cnic_001.png',
|
|
52
|
+
'reference_pmid': ['12204204', '12902394'],
|
|
53
|
+
'reference_doi': ['10.1016/S0306-4522(02)00305-6', '10.1093/cercor/13.9.950'],
|
|
54
|
+
'physical_Integrity': 'Dendrites Moderate',
|
|
55
|
+
'_links': {
|
|
56
|
+
'self': {
|
|
57
|
+
'href': 'http://neuromorpho.org/api/neuron/id/1'
|
|
58
|
+
},
|
|
59
|
+
'measurements': {
|
|
60
|
+
'href': 'http://neuromorpho.org/api/morphometry/id/1'
|
|
61
|
+
},
|
|
62
|
+
'persistence_vector': {
|
|
63
|
+
'href': 'http://neuromorpho.org/api/pvec/id/1'
|
|
64
|
+
}
|
|
65
|
+
}
|
|
66
|
+
}
|
|
67
|
+
```
|
|
68
|
+
|
|
69
|
+
NOTE: All denpendencies need to be installed, try:
|
|
70
|
+
|
|
71
|
+
```sh
|
|
72
|
+
pip install swcgeom[all]
|
|
73
|
+
```
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
import argparse
|
|
77
|
+
import io
|
|
78
|
+
import json
|
|
79
|
+
import logging
|
|
80
|
+
import math
|
|
81
|
+
import os
|
|
82
|
+
import urllib.parse
|
|
83
|
+
from collections.abc import Callable, Iterable
|
|
84
|
+
from typing import Any, Literal
|
|
85
|
+
|
|
86
|
+
from tqdm import tqdm
|
|
87
|
+
|
|
88
|
+
from swcgeom.utils import FileReader
|
|
89
|
+
|
|
90
|
+
__all__ = [
|
|
91
|
+
"neuromorpho_is_valid",
|
|
92
|
+
"neuromorpho_convert_lmdb_to_swc",
|
|
93
|
+
"download_neuromorpho",
|
|
94
|
+
"NeuroMorpho",
|
|
95
|
+
]
|
|
96
|
+
|
|
97
|
+
|
|
98
|
+
URL_BASE = "https://neuromorpho.org"
|
|
99
|
+
URL_METADATA = "api/neuron"
|
|
100
|
+
URL_MORPHO_CNG = "dableFiles/$ARCHIVE/CNG%20version/$NEURON.CNG.swc"
|
|
101
|
+
URL_MORPHO_SOURCE = "dableFiles/$ARCHIVE/Source-Version/$NEURON.$EXT"
|
|
102
|
+
URL_LOG_CNG = "dableFiles/$ARCHIVE/Remaining%20issues/$NEURON.CNG.swc.std"
|
|
103
|
+
URL_LOG_SOURCE = "dableFiles/$ARCHIVE/Standardization%20log/$NEURON.std"
|
|
104
|
+
API_PAGE_SIZE_MAX = 500
|
|
105
|
+
|
|
106
|
+
KB = 1024
|
|
107
|
+
MB = 1024 * KB
|
|
108
|
+
GB = 1024 * MB
|
|
109
|
+
|
|
110
|
+
# Test version: 8.5.25 (2023-08-01)
|
|
111
|
+
# No ETAs for future version
|
|
112
|
+
# Size of metadata about 0.5 GB
|
|
113
|
+
# Size of morpho_cng about 18 GB
|
|
114
|
+
# Not sure about the size of others
|
|
115
|
+
SIZE_METADATA = 2 * GB
|
|
116
|
+
SIZE_DATA = 20 * GB
|
|
117
|
+
|
|
118
|
+
RESOURCES = Literal["morpho_cng", "morpho_source", "log_cng", "log_source"]
|
|
119
|
+
DOWNLOAD_CONFIGS: dict[RESOURCES, tuple[str, int]] = {
|
|
120
|
+
# name/path: (url, size)
|
|
121
|
+
"morpho_cng": (URL_MORPHO_CNG, 20 * GB),
|
|
122
|
+
"morpho_source": (URL_LOG_CNG, 512 * GB),
|
|
123
|
+
"log_cng": (URL_LOG_CNG, 512 * GB),
|
|
124
|
+
"log_source": (URL_LOG_SOURCE, 512 * GB),
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
# fmt: off
|
|
128
|
+
# Test version: 8.5.25 (2023-08-01)
|
|
129
|
+
# No ETAs for future version
|
|
130
|
+
invalid_ids = [
|
|
131
|
+
# bad file
|
|
132
|
+
81062, 86970, 79791,
|
|
133
|
+
|
|
134
|
+
33294, # bad tree with multi root
|
|
135
|
+
268441, # invalid type `-1` in L5467
|
|
136
|
+
|
|
137
|
+
# # 404 not found
|
|
138
|
+
# # We don't mark these ids, since they will throw a warning when
|
|
139
|
+
# # downloading and converting, so that users can find out as early
|
|
140
|
+
# # as possible, and can recover immediately when the website fixes
|
|
141
|
+
# # this problem.
|
|
142
|
+
# 97058, 98302, 125801, 130581, 267258, 267259, 267261, 267772,
|
|
143
|
+
# 267773, 268284, 268285, 268286
|
|
144
|
+
]
|
|
145
|
+
# fmt: on
|
|
146
|
+
|
|
147
|
+
|
|
148
|
+
def neuromorpho_is_valid(metadata: dict[str, Any]) -> bool:
|
|
149
|
+
return metadata["neuron_id"] not in invalid_ids
|
|
150
|
+
|
|
151
|
+
|
|
152
|
+
def neuromorpho_convert_lmdb_to_swc(
|
|
153
|
+
root: str, dest: str | None = None, *, verbose: bool = False, **kwargs
|
|
154
|
+
) -> None:
|
|
155
|
+
nmo = NeuroMorpho(root, verbose=verbose)
|
|
156
|
+
nmo.convert_lmdb_to_swc(dest, **kwargs)
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
def download_neuromorpho(path: str, *, verbose: bool = False, **kwargs) -> None:
|
|
160
|
+
nmo = NeuroMorpho(path, verbose=verbose)
|
|
161
|
+
nmo.download(**kwargs)
|
|
162
|
+
|
|
163
|
+
|
|
164
|
+
class NeuroMorpho:
|
|
165
|
+
def __init__(
|
|
166
|
+
self, root: str, *, url_base: str = URL_BASE, verbose: bool = False
|
|
167
|
+
) -> None:
|
|
168
|
+
"""
|
|
169
|
+
Args:
|
|
170
|
+
root: str
|
|
171
|
+
verbose: Show verbose log.
|
|
172
|
+
"""
|
|
173
|
+
|
|
174
|
+
super().__init__()
|
|
175
|
+
self.root = root
|
|
176
|
+
self.url_base = url_base
|
|
177
|
+
self.verbose = verbose
|
|
178
|
+
|
|
179
|
+
def download(
|
|
180
|
+
self,
|
|
181
|
+
*,
|
|
182
|
+
retry: int = 3,
|
|
183
|
+
metadata: bool = True,
|
|
184
|
+
resources: Iterable[RESOURCES] = ["morpho_cng"],
|
|
185
|
+
**kwargs,
|
|
186
|
+
) -> None:
|
|
187
|
+
"""Download data from neuromorpho.org."""
|
|
188
|
+
|
|
189
|
+
# metadata
|
|
190
|
+
path_m = os.path.join(self.root, "metadata")
|
|
191
|
+
if metadata:
|
|
192
|
+
err_pages = None
|
|
193
|
+
for i in range(retry + 1):
|
|
194
|
+
if err_pages is not None and len(err_pages) == 0:
|
|
195
|
+
break
|
|
196
|
+
|
|
197
|
+
self._info("download metadata")
|
|
198
|
+
if i != 0:
|
|
199
|
+
self._info("retry %d: %s", i, json.dumps(err_pages))
|
|
200
|
+
|
|
201
|
+
err_pages = self._download_metadata(path_m, pages=err_pages, **kwargs)
|
|
202
|
+
|
|
203
|
+
self._info("download metadata done")
|
|
204
|
+
if err_pages is not None and len(err_pages) != 0:
|
|
205
|
+
self._warning("fails to download metadata: %s", json.dumps(err_pages))
|
|
206
|
+
else:
|
|
207
|
+
self._info("skip download metadata")
|
|
208
|
+
|
|
209
|
+
# file
|
|
210
|
+
def dumps(keys: list[bytes]) -> str:
|
|
211
|
+
return json.dumps([i.decode("utf-8") for i in keys])
|
|
212
|
+
|
|
213
|
+
for name in resources:
|
|
214
|
+
url, map_size = DOWNLOAD_CONFIGS[name]
|
|
215
|
+
path = os.path.join(self.root, name)
|
|
216
|
+
|
|
217
|
+
err_keys = None
|
|
218
|
+
for i in range(retry + 1):
|
|
219
|
+
if err_keys is not None and len(err_keys) == 0:
|
|
220
|
+
break
|
|
221
|
+
|
|
222
|
+
self._info("download %s", name)
|
|
223
|
+
if err_keys is not None:
|
|
224
|
+
self._info("retry %d: %s", i, dumps(err_keys))
|
|
225
|
+
|
|
226
|
+
err_keys = self._download_files(
|
|
227
|
+
url, path, path_m, map_size=map_size, **kwargs
|
|
228
|
+
)
|
|
229
|
+
|
|
230
|
+
self._info("download %s done", name)
|
|
231
|
+
if err_keys is not None and len(err_keys) != 0:
|
|
232
|
+
self._warning("fails to download %s: %s", name, dumps(err_keys))
|
|
233
|
+
|
|
234
|
+
# pylint: disable-next=too-many-locals
|
|
235
|
+
def convert_lmdb_to_swc(
|
|
236
|
+
self,
|
|
237
|
+
dest: str | None = None,
|
|
238
|
+
*,
|
|
239
|
+
group_by: str | Callable[[dict[str, Any]], str | None] | None = None,
|
|
240
|
+
where: Callable[[dict[str, Any]], bool] | None = None,
|
|
241
|
+
encoding: str | None = "utf-8",
|
|
242
|
+
) -> None:
|
|
243
|
+
r"""Convert lmdb format to SWCs.
|
|
244
|
+
|
|
245
|
+
NOTE: We are asserting the following folder.
|
|
246
|
+
|
|
247
|
+
```text
|
|
248
|
+
|- root
|
|
249
|
+
| |- metadata # input
|
|
250
|
+
| |- morpho_cng # input
|
|
251
|
+
| |- swc # output
|
|
252
|
+
| | |- groups # output of groups if grouped
|
|
253
|
+
```
|
|
254
|
+
|
|
255
|
+
Args:
|
|
256
|
+
path: str
|
|
257
|
+
dest: If None, use `path/swc`.
|
|
258
|
+
group_by: Group neurons by metadata.
|
|
259
|
+
If None, no grouping. If a string is entered, use it as a metadata
|
|
260
|
+
attribute name for grouping, e.g.: `archive`, `species`. If a callable
|
|
261
|
+
is entered, use it as a function `(metadata: dict[str, Any]) -> str | None\
|
|
262
|
+
to get the group name.
|
|
263
|
+
where: Filter neurons by metadata.
|
|
264
|
+
(metadata: dict[str, Any]) -> bool
|
|
265
|
+
encoding: Change swc encoding, part of the original data is not utf-8 encoded.
|
|
266
|
+
If is None, keep the original encoding format.default to `utf-8`
|
|
267
|
+
verbose: Print verbose info.
|
|
268
|
+
|
|
269
|
+
See Also:
|
|
270
|
+
neuromorpho_is_valid:
|
|
271
|
+
Recommended filter function, try `where=neuromorpho_is_valid`
|
|
272
|
+
"""
|
|
273
|
+
|
|
274
|
+
import lmdb
|
|
275
|
+
|
|
276
|
+
env_m = lmdb.Environment(os.path.join(self.root, "metadata"), readonly=True)
|
|
277
|
+
with env_m.begin() as tx_m:
|
|
278
|
+
where = where or (lambda _: True)
|
|
279
|
+
if isinstance(group_by, str):
|
|
280
|
+
key = group_by
|
|
281
|
+
|
|
282
|
+
def group_by_key(v):
|
|
283
|
+
return v[key]
|
|
284
|
+
|
|
285
|
+
group_by = group_by_key
|
|
286
|
+
|
|
287
|
+
elif group_by is None:
|
|
288
|
+
|
|
289
|
+
def no_group(v):
|
|
290
|
+
return None
|
|
291
|
+
|
|
292
|
+
group_by = no_group
|
|
293
|
+
|
|
294
|
+
items = []
|
|
295
|
+
for k, v in tx_m.cursor():
|
|
296
|
+
metadata = json.loads(v)
|
|
297
|
+
if where(metadata):
|
|
298
|
+
items.append((k, group_by(metadata)))
|
|
299
|
+
|
|
300
|
+
env_m.close()
|
|
301
|
+
|
|
302
|
+
dest = dest or os.path.join(self.root, "swc")
|
|
303
|
+
os.makedirs(dest, exist_ok=True)
|
|
304
|
+
for grp in set(grp for _, grp in items if grp is not None):
|
|
305
|
+
os.makedirs(os.path.join(dest, grp), exist_ok=True)
|
|
306
|
+
|
|
307
|
+
env_c = lmdb.Environment(os.path.join(self.root, "morpho_cng"), readonly=True)
|
|
308
|
+
with env_c.begin() as tx_c:
|
|
309
|
+
for k, grp in tqdm(items) if self.verbose else items:
|
|
310
|
+
kk = k.decode("utf-8")
|
|
311
|
+
try:
|
|
312
|
+
bs = tx_c.get(k)
|
|
313
|
+
if bs is None:
|
|
314
|
+
self._warning("morpho_cng of '%s' not exists", kk)
|
|
315
|
+
continue
|
|
316
|
+
|
|
317
|
+
fs = (
|
|
318
|
+
os.path.join(dest, grp, f"{kk}.swc")
|
|
319
|
+
if grp is not None
|
|
320
|
+
else os.path.join(dest, f"{kk}.swc")
|
|
321
|
+
)
|
|
322
|
+
|
|
323
|
+
if encoding is None:
|
|
324
|
+
with open(fs, "wb") as f:
|
|
325
|
+
f.write(bs)
|
|
326
|
+
else:
|
|
327
|
+
bs = io.BytesIO(bs)
|
|
328
|
+
with (
|
|
329
|
+
open(fs, "w", encoding=encoding) as fw,
|
|
330
|
+
FileReader(bs, encoding="detect") as fr,
|
|
331
|
+
):
|
|
332
|
+
fw.writelines(fr.readlines())
|
|
333
|
+
except (IOError, lmdb.Error) as e:
|
|
334
|
+
self._warning("fails to convert of %s, err: %s", kk, e)
|
|
335
|
+
|
|
336
|
+
env_c.close()
|
|
337
|
+
|
|
338
|
+
# Downloader
|
|
339
|
+
|
|
340
|
+
def _download_metadata(
|
|
341
|
+
self,
|
|
342
|
+
path: str,
|
|
343
|
+
*,
|
|
344
|
+
pages: Iterable[int] | None = None,
|
|
345
|
+
page_size: int = API_PAGE_SIZE_MAX,
|
|
346
|
+
**kwargs,
|
|
347
|
+
) -> list[int]:
|
|
348
|
+
r"""Download all neuron metadata.
|
|
349
|
+
|
|
350
|
+
Args:
|
|
351
|
+
path: Path to save data.
|
|
352
|
+
pages: If is None, download all pages.
|
|
353
|
+
verbose: Show verbose log.
|
|
354
|
+
**kwargs: Forwarding to `get`.
|
|
355
|
+
|
|
356
|
+
Returns:
|
|
357
|
+
err_pages: Failed pages.
|
|
358
|
+
"""
|
|
359
|
+
|
|
360
|
+
# TODO: how to cache between versions?
|
|
361
|
+
import lmdb
|
|
362
|
+
|
|
363
|
+
env = lmdb.Environment(path, map_size=SIZE_METADATA)
|
|
364
|
+
if pages is None:
|
|
365
|
+
res = self._get_metadata(page=0, page_size=1, **kwargs)
|
|
366
|
+
total = res["page"]["totalElements"]
|
|
367
|
+
pages = range(math.ceil(total / page_size))
|
|
368
|
+
|
|
369
|
+
err_pages = []
|
|
370
|
+
for page in tqdm(pages) if self.verbose else pages:
|
|
371
|
+
try:
|
|
372
|
+
res = self._get_metadata(page, page_size=page_size, **kwargs)
|
|
373
|
+
with env.begin(write=True) as tx:
|
|
374
|
+
for neuron in res["_embedded"]["neuronResources"]:
|
|
375
|
+
k = str(neuron["neuron_id"]).encode("utf-8")
|
|
376
|
+
v = json.dumps(neuron).encode("utf-8")
|
|
377
|
+
tx.put(key=k, value=v)
|
|
378
|
+
except IOError as e:
|
|
379
|
+
err_pages.append(page)
|
|
380
|
+
self._warning("fails to get metadata of page %s, err: %s", page, e)
|
|
381
|
+
|
|
382
|
+
env.close()
|
|
383
|
+
return err_pages
|
|
384
|
+
|
|
385
|
+
# pylint: disable-next=too-many-locals
|
|
386
|
+
def _download_files(
|
|
387
|
+
self,
|
|
388
|
+
url: str,
|
|
389
|
+
path: str,
|
|
390
|
+
path_metadata: str,
|
|
391
|
+
*,
|
|
392
|
+
keys: Iterable[bytes] | None = None,
|
|
393
|
+
override: bool = False,
|
|
394
|
+
map_size: int = 512 * GB,
|
|
395
|
+
**kwargs,
|
|
396
|
+
) -> list[bytes]:
|
|
397
|
+
"""Download files.
|
|
398
|
+
|
|
399
|
+
Args:
|
|
400
|
+
url: URL of file.
|
|
401
|
+
path: Path to save data.
|
|
402
|
+
path_metadata: Path to lmdb of metadata.
|
|
403
|
+
keys: If exist, ignore `override` option. If None, download all key.
|
|
404
|
+
override: Override even exists, default to False
|
|
405
|
+
map_size: int, default 512GB
|
|
406
|
+
**kwargs: Forwarding to `get`.
|
|
407
|
+
|
|
408
|
+
Returns:
|
|
409
|
+
err_keys: Failed keys.
|
|
410
|
+
"""
|
|
411
|
+
|
|
412
|
+
import lmdb
|
|
413
|
+
|
|
414
|
+
env_m = lmdb.Environment(path_metadata, map_size=SIZE_METADATA, readonly=True)
|
|
415
|
+
env_c = lmdb.Environment(path, map_size=map_size)
|
|
416
|
+
if keys is None:
|
|
417
|
+
with env_m.begin() as tx_m:
|
|
418
|
+
if override:
|
|
419
|
+
keys = [k for k, _ in tx_m.cursor()]
|
|
420
|
+
else:
|
|
421
|
+
with env_c.begin() as tx:
|
|
422
|
+
keys = [k for k, _ in tx_m.cursor() if tx.get(k) is None]
|
|
423
|
+
|
|
424
|
+
err_keys = []
|
|
425
|
+
for k in tqdm(keys) if self.verbose else keys:
|
|
426
|
+
try:
|
|
427
|
+
with env_m.begin() as tx:
|
|
428
|
+
metadata = json.loads(tx.get(k).decode("utf-8"))
|
|
429
|
+
|
|
430
|
+
swc = self._get_file(url, metadata, **kwargs)
|
|
431
|
+
with env_c.begin(write=True) as tx:
|
|
432
|
+
tx.put(key=k, value=swc)
|
|
433
|
+
except IOError as e:
|
|
434
|
+
err_keys.append(k)
|
|
435
|
+
self._warning(
|
|
436
|
+
"fails to get morphology file `%s`, err: %s", k.decode("utf-8"), e
|
|
437
|
+
)
|
|
438
|
+
|
|
439
|
+
env_m.close()
|
|
440
|
+
env_c.close()
|
|
441
|
+
return err_keys
|
|
442
|
+
|
|
443
|
+
def _get_metadata(
|
|
444
|
+
self, page: int, page_size: int = API_PAGE_SIZE_MAX, **kwargs
|
|
445
|
+
) -> dict[str, Any]:
|
|
446
|
+
params = {
|
|
447
|
+
"page": page,
|
|
448
|
+
"size": page_size,
|
|
449
|
+
"sort": "neuron_id,neuron_id,asc",
|
|
450
|
+
}
|
|
451
|
+
query = "&".join([f"{k}={v}" for k, v in params.items()])
|
|
452
|
+
url = f"{URL_METADATA}?{query}"
|
|
453
|
+
resp = self._get(url, **kwargs)
|
|
454
|
+
return json.loads(resp)
|
|
455
|
+
|
|
456
|
+
def _get_file(self, url: str, metadata: dict[str, Any], **kwargs) -> bytes:
|
|
457
|
+
"""Get file.
|
|
458
|
+
|
|
459
|
+
Returns:
|
|
460
|
+
bs: Bytes of morphology file, encoding is NOT FIXED.
|
|
461
|
+
"""
|
|
462
|
+
|
|
463
|
+
archive = urllib.parse.quote(metadata["archive"].lower())
|
|
464
|
+
neuron = urllib.parse.quote(metadata["neuron_name"])
|
|
465
|
+
ext = self._guess_ext(metadata)
|
|
466
|
+
url = (
|
|
467
|
+
url.replace("$ARCHIVE", archive)
|
|
468
|
+
.replace("$NEURON", neuron)
|
|
469
|
+
.replace("$EXT", ext)
|
|
470
|
+
)
|
|
471
|
+
return self._get(url, **kwargs)
|
|
472
|
+
|
|
473
|
+
def _get(
|
|
474
|
+
self, url: str, *, timeout: int = 2 * 60, proxy: str | None = None
|
|
475
|
+
) -> bytes:
|
|
476
|
+
if not url.startswith("http://") and not url.startswith("https://"):
|
|
477
|
+
url = urllib.parse.urljoin(self.url_base, url)
|
|
478
|
+
|
|
479
|
+
proxies = None
|
|
480
|
+
if proxy is not None:
|
|
481
|
+
proxies = {"http": proxy, "https": proxy}
|
|
482
|
+
|
|
483
|
+
response = self._session().get(url, timeout=timeout, proxies=proxies)
|
|
484
|
+
response.raise_for_status()
|
|
485
|
+
return response.content
|
|
486
|
+
|
|
487
|
+
def _session(self) -> Any:
|
|
488
|
+
if hasattr(self, "session"):
|
|
489
|
+
return self.session
|
|
490
|
+
|
|
491
|
+
import requests
|
|
492
|
+
import requests.adapters
|
|
493
|
+
import urllib3
|
|
494
|
+
import urllib3.util
|
|
495
|
+
|
|
496
|
+
class CustomSSLContextHTTPAdapter(requests.adapters.HTTPAdapter):
|
|
497
|
+
def __init__(self, ssl_context=None, **kwargs):
|
|
498
|
+
self.ssl_context = ssl_context
|
|
499
|
+
super().__init__(**kwargs)
|
|
500
|
+
|
|
501
|
+
def init_poolmanager(
|
|
502
|
+
self, connections, maxsize, block=False, **pool_kwargs
|
|
503
|
+
):
|
|
504
|
+
super().init_poolmanager(
|
|
505
|
+
connections,
|
|
506
|
+
maxsize,
|
|
507
|
+
block,
|
|
508
|
+
ssl_context=self.ssl_context,
|
|
509
|
+
**pool_kwargs,
|
|
510
|
+
)
|
|
511
|
+
|
|
512
|
+
def proxy_manager_for(self, proxy, **proxy_kwargs):
|
|
513
|
+
return super().proxy_manager_for(
|
|
514
|
+
proxy, **proxy_kwargs, ssl_context=self.ssl_context
|
|
515
|
+
)
|
|
516
|
+
|
|
517
|
+
ctx = urllib3.util.create_urllib3_context()
|
|
518
|
+
ctx.load_default_certs()
|
|
519
|
+
ctx.set_ciphers("DEFAULT@SECLEVEL=1")
|
|
520
|
+
|
|
521
|
+
session = requests.session()
|
|
522
|
+
session.adapters.pop("https://", None)
|
|
523
|
+
session.mount("https://", CustomSSLContextHTTPAdapter(ssl_context=ctx))
|
|
524
|
+
|
|
525
|
+
self.session = session
|
|
526
|
+
return session
|
|
527
|
+
|
|
528
|
+
# format
|
|
529
|
+
def _guess_ext(self, metadata) -> str:
|
|
530
|
+
match metadata["original_format"]:
|
|
531
|
+
case "Custom.xml":
|
|
532
|
+
return "morph.xml"
|
|
533
|
+
|
|
534
|
+
case _:
|
|
535
|
+
_, ext = os.path.splitext(metadata["original_format"])
|
|
536
|
+
return ext[1:]
|
|
537
|
+
|
|
538
|
+
# log helper
|
|
539
|
+
|
|
540
|
+
def _info(self, msg: str, *arg):
|
|
541
|
+
logging.info(msg, *arg, stacklevel=2)
|
|
542
|
+
if self.verbose:
|
|
543
|
+
print(msg.format(*arg))
|
|
544
|
+
|
|
545
|
+
def _warning(self, msg: str, *arg):
|
|
546
|
+
logging.warning(msg, *arg, stacklevel=2)
|
|
547
|
+
if self.verbose:
|
|
548
|
+
print(msg.format(*arg))
|
|
549
|
+
|
|
550
|
+
|
|
551
|
+
if __name__ == "__main__":
|
|
552
|
+
parser = argparse.ArgumentParser(description="Use data from neuromorpho.org")
|
|
553
|
+
subparsers = parser.add_subparsers(required=True)
|
|
554
|
+
|
|
555
|
+
sub = subparsers.add_parser("download")
|
|
556
|
+
sub.add_argument("-o", "--path", type=str)
|
|
557
|
+
sub.add_argument("--retry", type=int, default=3)
|
|
558
|
+
sub.add_argument("--metadata", type=bool, default=True)
|
|
559
|
+
sub.add_argument(
|
|
560
|
+
"--resources",
|
|
561
|
+
type=str,
|
|
562
|
+
nargs="*",
|
|
563
|
+
default=["morpho_cng"],
|
|
564
|
+
choices=["morpho_cng", "morpho_source", "log_cng", "log_source"],
|
|
565
|
+
)
|
|
566
|
+
sub.add_argument("--proxy", type=str, default=None)
|
|
567
|
+
sub.add_argument("--verbose", type=bool, default=True)
|
|
568
|
+
sub.set_defaults(func=download_neuromorpho)
|
|
569
|
+
|
|
570
|
+
sub = subparsers.add_parser("convert")
|
|
571
|
+
sub.add_argument("-i", "--root", type=str, required=True)
|
|
572
|
+
sub.add_argument("-o", "--dest", type=str, default=None)
|
|
573
|
+
sub.add_argument("--group_by", type=str, default=None)
|
|
574
|
+
sub.add_argument("--encoding", type=str, default="utf-8")
|
|
575
|
+
sub.add_argument("--verbose", type=bool, default=True)
|
|
576
|
+
sub.set_defaults(func=neuromorpho_convert_lmdb_to_swc)
|
|
577
|
+
|
|
578
|
+
args = parser.parse_args()
|
|
579
|
+
func = args.func
|
|
580
|
+
del args.func # type: ignore
|
|
581
|
+
func(**vars(args))
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
# SPDX-FileCopyrightText: 2022 - 2025 Zexin Yuan <pypi@yzx9.xyz>
|
|
2
|
+
#
|
|
3
|
+
# SPDX-License-Identifier: Apache-2.0
|
|
4
|
+
|
|
5
|
+
"""Numpy related utils."""
|
|
6
|
+
|
|
7
|
+
from contextlib import contextmanager
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
import numpy as np
|
|
11
|
+
import numpy.typing as npt
|
|
12
|
+
|
|
13
|
+
__all__ = ["padding1d", "numpy_err"]
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
def padding1d(
|
|
17
|
+
n: int,
|
|
18
|
+
v: npt.ArrayLike | None,
|
|
19
|
+
padding_value: Any = 0,
|
|
20
|
+
dtype: npt.DTypeLike | None = None,
|
|
21
|
+
) -> npt.NDArray:
|
|
22
|
+
"""Padding x to array of shape (n,).
|
|
23
|
+
|
|
24
|
+
>>> padding1d(5, [1, 2, 3])
|
|
25
|
+
array([1., 2., 3., 0., 0.], dtype=float32)
|
|
26
|
+
>>> padding1d(5, [1, 2, 3], padding_value=6)
|
|
27
|
+
array([1., 2., 3., 6., 6.], dtype=float32)
|
|
28
|
+
>>> padding1d(5, [1, 2, 3], dtype=np.int64)
|
|
29
|
+
array([1, 2, 3, 0, 0])
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
n: Size of vector.
|
|
33
|
+
v: Input vector.
|
|
34
|
+
padding_value: Padding value.
|
|
35
|
+
If x.shape[0] is less than n, the rest will be filled with padding value.
|
|
36
|
+
dtype: Data type of array.
|
|
37
|
+
If specify, cast x to dtype, else dtype of x will used, otherwise defaults
|
|
38
|
+
to `~numpy.float32`.
|
|
39
|
+
"""
|
|
40
|
+
|
|
41
|
+
if not isinstance(v, np.ndarray):
|
|
42
|
+
dtype = dtype or np.float32
|
|
43
|
+
if v is not None:
|
|
44
|
+
v = np.array(v, dtype=dtype)
|
|
45
|
+
else:
|
|
46
|
+
v = np.zeros(n, dtype=dtype)
|
|
47
|
+
|
|
48
|
+
if dtype is None:
|
|
49
|
+
dtype = v.dtype
|
|
50
|
+
|
|
51
|
+
if v.dtype != dtype:
|
|
52
|
+
v = v.astype(dtype)
|
|
53
|
+
|
|
54
|
+
assert v.ndim == 1
|
|
55
|
+
|
|
56
|
+
if v.shape[0] >= n:
|
|
57
|
+
return v[:n]
|
|
58
|
+
|
|
59
|
+
padding = np.full(n - v.shape[0], padding_value, dtype=dtype)
|
|
60
|
+
return np.concatenate([v, padding])
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
@contextmanager
|
|
64
|
+
def numpy_err(*args, **kwargs):
|
|
65
|
+
old_settings = np.seterr(*args, **kwargs)
|
|
66
|
+
|
|
67
|
+
try:
|
|
68
|
+
yield
|
|
69
|
+
finally:
|
|
70
|
+
np.seterr(**old_settings)
|