swcgeom 0.15.0__py3-none-any.whl → 0.18.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of swcgeom might be problematic. Click here for more details.

Files changed (72) hide show
  1. swcgeom/__init__.py +26 -1
  2. swcgeom/analysis/__init__.py +21 -8
  3. swcgeom/analysis/feature_extractor.py +43 -18
  4. swcgeom/analysis/features.py +250 -0
  5. swcgeom/analysis/lmeasure.py +857 -0
  6. swcgeom/analysis/sholl.py +55 -29
  7. swcgeom/analysis/trunk.py +27 -11
  8. swcgeom/analysis/visualization.py +24 -9
  9. swcgeom/analysis/visualization3d.py +100 -0
  10. swcgeom/analysis/volume.py +19 -4
  11. swcgeom/core/__init__.py +32 -9
  12. swcgeom/core/branch.py +28 -7
  13. swcgeom/core/branch_tree.py +18 -4
  14. swcgeom/core/{segment.py → compartment.py} +31 -10
  15. swcgeom/core/node.py +31 -10
  16. swcgeom/core/path.py +37 -10
  17. swcgeom/core/population.py +103 -34
  18. swcgeom/core/swc.py +26 -10
  19. swcgeom/core/swc_utils/__init__.py +21 -7
  20. swcgeom/core/swc_utils/assembler.py +27 -1
  21. swcgeom/core/swc_utils/base.py +25 -12
  22. swcgeom/core/swc_utils/checker.py +31 -14
  23. swcgeom/core/swc_utils/io.py +24 -7
  24. swcgeom/core/swc_utils/normalizer.py +20 -4
  25. swcgeom/core/swc_utils/subtree.py +17 -2
  26. swcgeom/core/tree.py +85 -72
  27. swcgeom/core/tree_utils.py +31 -16
  28. swcgeom/core/tree_utils_impl.py +18 -3
  29. swcgeom/images/__init__.py +17 -2
  30. swcgeom/images/augmentation.py +24 -4
  31. swcgeom/images/contrast.py +122 -0
  32. swcgeom/images/folder.py +97 -39
  33. swcgeom/images/io.py +108 -121
  34. swcgeom/transforms/__init__.py +28 -10
  35. swcgeom/transforms/base.py +17 -2
  36. swcgeom/transforms/branch.py +74 -8
  37. swcgeom/transforms/branch_tree.py +82 -0
  38. swcgeom/transforms/geometry.py +22 -7
  39. swcgeom/transforms/image_preprocess.py +115 -0
  40. swcgeom/transforms/image_stack.py +37 -13
  41. swcgeom/transforms/images.py +184 -7
  42. swcgeom/transforms/mst.py +20 -5
  43. swcgeom/transforms/neurolucida_asc.py +508 -0
  44. swcgeom/transforms/path.py +15 -0
  45. swcgeom/transforms/population.py +16 -3
  46. swcgeom/transforms/tree.py +89 -31
  47. swcgeom/transforms/tree_assembler.py +23 -7
  48. swcgeom/utils/__init__.py +27 -11
  49. swcgeom/utils/debug.py +15 -0
  50. swcgeom/utils/download.py +59 -21
  51. swcgeom/utils/dsu.py +15 -0
  52. swcgeom/utils/ellipse.py +18 -4
  53. swcgeom/utils/file.py +15 -0
  54. swcgeom/utils/neuromorpho.py +439 -302
  55. swcgeom/utils/numpy_helper.py +29 -4
  56. swcgeom/utils/plotter_2d.py +151 -0
  57. swcgeom/utils/plotter_3d.py +48 -0
  58. swcgeom/utils/renderer.py +49 -145
  59. swcgeom/utils/sdf.py +24 -8
  60. swcgeom/utils/solid_geometry.py +16 -3
  61. swcgeom/utils/transforms.py +17 -4
  62. swcgeom/utils/volumetric_object.py +23 -10
  63. {swcgeom-0.15.0.dist-info → swcgeom-0.18.3.dist-info}/LICENSE +1 -1
  64. {swcgeom-0.15.0.dist-info → swcgeom-0.18.3.dist-info}/METADATA +28 -24
  65. swcgeom-0.18.3.dist-info/RECORD +67 -0
  66. {swcgeom-0.15.0.dist-info → swcgeom-0.18.3.dist-info}/WHEEL +1 -1
  67. swcgeom/_version.py +0 -16
  68. swcgeom/analysis/branch_features.py +0 -67
  69. swcgeom/analysis/node_features.py +0 -121
  70. swcgeom/analysis/path_features.py +0 -37
  71. swcgeom-0.15.0.dist-info/RECORD +0 -62
  72. {swcgeom-0.15.0.dist-info → swcgeom-0.18.3.dist-info}/top_level.txt +0 -0
@@ -1,9 +1,24 @@
1
+ # Copyright 2022-2025 Zexin Yuan
2
+ #
3
+ # Licensed under the Apache License, Version 2.0 (the "License");
4
+ # you may not use this file except in compliance with the License.
5
+ # You may obtain a copy of the License at
6
+ #
7
+ # http://www.apache.org/licenses/LICENSE-2.0
8
+ #
9
+ # Unless required by applicable law or agreed to in writing, software
10
+ # distributed under the License is distributed on an "AS IS" BASIS,
11
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12
+ # See the License for the specific language governing permissions and
13
+ # limitations under the License.
14
+
15
+
1
16
  """NeuroMorpho.org.
2
17
 
3
18
  Examples
4
19
  --------
5
20
 
6
- Metadata:
21
+ Metadata:
7
22
 
8
23
  ```json
9
24
  {
@@ -81,7 +96,10 @@ import logging
81
96
  import math
82
97
  import os
83
98
  import urllib.parse
84
- from typing import Any, Callable, Dict, Iterable, List, Optional
99
+ from collections.abc import Callable, Iterable
100
+ from typing import Any, Literal, Optional
101
+
102
+ from tqdm import tqdm
85
103
 
86
104
  from swcgeom.utils import FileReader
87
105
 
@@ -89,24 +107,39 @@ __all__ = [
89
107
  "neuromorpho_is_valid",
90
108
  "neuromorpho_convert_lmdb_to_swc",
91
109
  "download_neuromorpho",
110
+ "NeuroMorpho",
92
111
  ]
93
112
 
94
- URL_NEURON = "https://neuromorpho.org/api/neuron"
95
- URL_CNG_VERSION = (
96
- "https://neuromorpho.org/dableFiles/$ARCHIVE/CNG%20version/$NEURON.CNG.swc"
97
- )
98
- API_NEURON_MAX_SIZE = 500
113
+
114
+ URL_BASE = "https://neuromorpho.org"
115
+ URL_METADATA = "api/neuron"
116
+ URL_MORPHO_CNG = "dableFiles/$ARCHIVE/CNG%20version/$NEURON.CNG.swc"
117
+ URL_MORPHO_SOURCE = "dableFiles/$ARCHIVE/Source-Version/$NEURON.$EXT"
118
+ URL_LOG_CNG = "dableFiles/$ARCHIVE/Remaining%20issues/$NEURON.CNG.swc.std"
119
+ URL_LOG_SOURCE = "dableFiles/$ARCHIVE/Standardization%20log/$NEURON.std"
120
+ API_PAGE_SIZE_MAX = 500
99
121
 
100
122
  KB = 1024
101
123
  MB = 1024 * KB
102
124
  GB = 1024 * MB
103
125
 
104
126
  # Test version: 8.5.25 (2023-08-01)
105
- # About 1.1 GB and 18 GB
106
127
  # No ETAs for future version
128
+ # Size of metadata about 0.5 GB
129
+ # Size of morpho_cng about 18 GB
130
+ # Not sure about the size of others
107
131
  SIZE_METADATA = 2 * GB
108
132
  SIZE_DATA = 20 * GB
109
133
 
134
+ RESOURCES = Literal["morpho_cng", "morpho_source", "log_cng", "log_source"]
135
+ DOWNLOAD_CONFIGS: dict[RESOURCES, tuple[str, int]] = {
136
+ # name/path: (url, size)
137
+ "morpho_cng": (URL_MORPHO_CNG, 20 * GB),
138
+ "morpho_source": (URL_LOG_CNG, 512 * GB),
139
+ "log_cng": (URL_LOG_CNG, 512 * GB),
140
+ "log_source": (URL_LOG_SOURCE, 512 * GB),
141
+ }
142
+
110
143
  # fmt:off
111
144
  # Test version: 8.5.25 (2023-08-01)
112
145
  # No ETAs for future version
@@ -128,320 +161,416 @@ invalid_ids = [
128
161
  # fmt: on
129
162
 
130
163
 
131
- def neuromorpho_is_valid(metadata: Dict[str, Any]) -> bool:
164
+ def neuromorpho_is_valid(metadata: dict[str, Any]) -> bool:
132
165
  return metadata["neuron_id"] not in invalid_ids
133
166
 
134
167
 
135
- # pylint: disable-next=too-many-locals
136
168
  def neuromorpho_convert_lmdb_to_swc(
137
- root: str,
138
- dest: Optional[str] = None,
139
- *,
140
- group_by: Optional[str | Callable[[Dict[str, Any]], str | None]] = None,
141
- where: Optional[Callable[[Dict[str, Any]], bool]] = None,
142
- encoding: str | None = "utf-8",
143
- verbose: bool = False,
169
+ root: str, dest: Optional[str] = None, *, verbose: bool = False, **kwargs
144
170
  ) -> None:
145
- """Convert lmdb format to SWCs.
146
-
147
- Parameters
148
- ----------
149
- path : str
150
- dest : str, optional
151
- If None, use `path/swc`.
152
- group_by : str | (metadata: Dict[str, Any]) -> str | None, optional
153
- Group neurons by metadata. If a None is returned then no
154
- grouping. If a string is entered, use it as a metadata
155
- attribute name for grouping, e.g.: `archive`, `species`.
156
- where : (metadata: Dict[str, Any]) -> bool, optional
157
- Filter neurons by metadata.
158
- encoding : str | None, default to `utf-8`
159
- Change swc encoding, part of the original data is not utf-8
160
- encoded. If is None, keep the original encoding format.
161
- verbose : bool, default False
162
- Print verbose info.
163
-
164
- Notes
165
- -----
166
- We are asserting the following folder.
167
-
168
- ```text
169
- |- root
170
- | |- metadata # input
171
- | |- cng_version # input
172
- | |- swc # output
173
- | | |- groups # output of groups if grouped
174
- ```
175
-
176
- See Also
177
- --------
178
- neuromorpho_is_valid :
179
- Recommended filter function, try `where=neuromorpho_is_valid`
180
- """
181
- import lmdb
182
- from tqdm import tqdm
183
-
184
- assert os.path.exists(root)
185
-
186
- env_m = lmdb.Environment(os.path.join(root, "metadata"), readonly=True)
187
- with env_m.begin() as tx_m:
188
- where = where or (lambda _: True)
189
- if isinstance(group_by, str):
190
- key = group_by
191
- group_by = lambda v: v[key] # pylint: disable=unnecessary-lambda-assignment
192
- elif group_by is None:
193
- group_by = lambda _: None # pylint: disable=unnecessary-lambda-assignment
194
- items = []
195
- for k, v in tx_m.cursor():
196
- metadata = json.loads(v)
197
- if where(metadata):
198
- items.append((k, group_by(metadata)))
199
-
200
- env_m.close()
201
-
202
- dest = dest or os.path.join(root, "swc")
203
- os.makedirs(dest, exist_ok=True)
204
- for grp in set(grp for _, grp in items if grp is not None):
205
- os.makedirs(os.path.join(dest, grp), exist_ok=True)
206
-
207
- env_c = lmdb.Environment(os.path.join(root, "cng_version"), readonly=True)
208
- with env_c.begin() as tx_c:
209
- for k, grp in tqdm(items) if verbose else items:
210
- kk = k.decode("utf-8")
211
- try:
212
- bs = tx_c.get(k)
213
- if bs is None:
214
- logging.warning("cng version of '%s' not exists", kk)
215
- continue
216
-
217
- fs = (
218
- os.path.join(dest, grp, f"{kk}.swc")
219
- if grp is not None
220
- else os.path.join(dest, f"{kk}.swc")
171
+ nmo = NeuroMorpho(root, verbose=verbose)
172
+ nmo.convert_lmdb_to_swc(dest, **kwargs)
173
+
174
+
175
+ def download_neuromorpho(path: str, *, verbose: bool = False, **kwargs) -> None:
176
+ nmo = NeuroMorpho(path, verbose=verbose)
177
+ nmo.download(**kwargs)
178
+
179
+
180
+ class NeuroMorpho:
181
+ def __init__(
182
+ self, root: str, *, url_base: str = URL_BASE, verbose: bool = False
183
+ ) -> None:
184
+ """
185
+ Parameters
186
+ ----------
187
+ root : str
188
+ verbose : bool, default False
189
+ Show verbose log.
190
+ """
191
+
192
+ super().__init__()
193
+ self.root = root
194
+ self.url_base = url_base
195
+ self.verbose = verbose
196
+
197
+ def download(
198
+ self,
199
+ *,
200
+ retry: int = 3,
201
+ metadata: bool = True,
202
+ resources: Iterable[RESOURCES] = ["morpho_cng"],
203
+ **kwargs,
204
+ ) -> None:
205
+ """Download data from neuromorpho.org."""
206
+
207
+ # metadata
208
+ path_m = os.path.join(self.root, "metadata")
209
+ if metadata:
210
+ err_pages = None
211
+ for i in range(retry + 1):
212
+ if err_pages is not None and len(err_pages) == 0:
213
+ break
214
+
215
+ self._info("download metadata")
216
+ if i != 0:
217
+ self._info("retry %d: %s", i, json.dumps(err_pages))
218
+
219
+ err_pages = self._download_metadata(path_m, pages=err_pages, **kwargs)
220
+
221
+ self._info("download metadata done")
222
+ if err_pages is not None and len(err_pages) != 0:
223
+ self._warning("fails to download metadata: %s", json.dumps(err_pages))
224
+ else:
225
+ self._info("skip download metadata")
226
+
227
+ # file
228
+ def dumps(keys: list[bytes]) -> str:
229
+ return json.dumps([i.decode("utf-8") for i in keys])
230
+
231
+ for name in resources:
232
+ url, map_size = DOWNLOAD_CONFIGS[name]
233
+ path = os.path.join(self.root, name)
234
+
235
+ err_keys = None
236
+ for i in range(retry + 1):
237
+ if err_keys is not None and len(err_keys) == 0:
238
+ break
239
+
240
+ self._info("download %s", name)
241
+ if err_keys is not None:
242
+ self._info("retry %d: %s", i, dumps(err_keys))
243
+
244
+ err_keys = self._download_files(
245
+ url, path, path_m, map_size=map_size, **kwargs
221
246
  )
222
247
 
223
- if encoding is None:
224
- with open(fs, "wb") as f:
225
- f.write(bs) # type: ignore
248
+ self._info("download %s done", name)
249
+ if err_keys is not None and len(err_keys) != 0:
250
+ self._warning("fails to download %s: %s", name, dumps(err_keys))
251
+
252
+ # pylint: disable-next=too-many-locals
253
+ def convert_lmdb_to_swc(
254
+ self,
255
+ dest: Optional[str] = None,
256
+ *,
257
+ group_by: Optional[str | Callable[[dict[str, Any]], str | None]] = None,
258
+ where: Optional[Callable[[dict[str, Any]], bool]] = None,
259
+ encoding: str | None = "utf-8",
260
+ ) -> None:
261
+ r"""Convert lmdb format to SWCs.
262
+
263
+ Parameters
264
+ ----------
265
+ path : str
266
+ dest : str, optional
267
+ If None, use `path/swc`.
268
+ group_by : str | (metadata: dict[str, Any]) -> str | None, optional
269
+ Group neurons by metadata. If a None is returned then no
270
+ grouping. If a string is entered, use it as a metadata
271
+ attribute name for grouping, e.g.: `archive`, `species`.
272
+ where : (metadata: dict[str, Any]) -> bool, optional
273
+ Filter neurons by metadata.
274
+ encoding : str | None, default to `utf-8`
275
+ Change swc encoding, part of the original data is not utf-8
276
+ encoded. If is None, keep the original encoding format.
277
+ verbose : bool, default False
278
+ Print verbose info.
279
+
280
+ Notes
281
+ -----
282
+ We are asserting the following folder.
283
+
284
+ ```text
285
+ |- root
286
+ | |- metadata # input
287
+ | |- morpho_cng # input
288
+ | |- swc # output
289
+ | | |- groups # output of groups if grouped
290
+ ```
291
+
292
+ See Also
293
+ --------
294
+ neuromorpho_is_valid :
295
+ Recommended filter function, try `where=neuromorpho_is_valid`
296
+ """
297
+
298
+ import lmdb
299
+
300
+ env_m = lmdb.Environment(os.path.join(self.root, "metadata"), readonly=True)
301
+ with env_m.begin() as tx_m:
302
+ where = where or (lambda _: True)
303
+ if isinstance(group_by, str):
304
+ key = group_by
305
+ group_by = lambda v: v[key] # pylint: disable=unnecessary-lambda-assignment
306
+ elif group_by is None:
307
+ group_by = lambda _: None # pylint: disable=unnecessary-lambda-assignment
308
+ items = []
309
+ for k, v in tx_m.cursor():
310
+ metadata = json.loads(v)
311
+ if where(metadata):
312
+ items.append((k, group_by(metadata)))
313
+
314
+ env_m.close()
315
+
316
+ dest = dest or os.path.join(self.root, "swc")
317
+ os.makedirs(dest, exist_ok=True)
318
+ for grp in set(grp for _, grp in items if grp is not None):
319
+ os.makedirs(os.path.join(dest, grp), exist_ok=True)
320
+
321
+ env_c = lmdb.Environment(os.path.join(self.root, "morpho_cng"), readonly=True)
322
+ with env_c.begin() as tx_c:
323
+ for k, grp in tqdm(items) if self.verbose else items:
324
+ kk = k.decode("utf-8")
325
+ try:
326
+ bs = tx_c.get(k)
327
+ if bs is None:
328
+ self._warning("morpho_cng of '%s' not exists", kk)
329
+ continue
330
+
331
+ fs = (
332
+ os.path.join(dest, grp, f"{kk}.swc")
333
+ if grp is not None
334
+ else os.path.join(dest, f"{kk}.swc")
335
+ )
336
+
337
+ if encoding is None:
338
+ with open(fs, "wb") as f:
339
+ f.write(bs) # type: ignore
340
+ else:
341
+ bs = io.BytesIO(bs) # type: ignore
342
+ with (
343
+ open(fs, "w", encoding=encoding) as fw,
344
+ FileReader(bs, encoding="detect") as fr,
345
+ ):
346
+ fw.writelines(fr.readlines())
347
+ except (IOError, lmdb.Error) as e:
348
+ self._warning("fails to convert of %s, err: %s", kk, e)
349
+
350
+ env_c.close()
351
+
352
+ # Downloader
353
+
354
+ def _download_metadata(
355
+ self,
356
+ path: str,
357
+ *,
358
+ pages: Optional[Iterable[int]] = None,
359
+ page_size: int = API_PAGE_SIZE_MAX,
360
+ **kwargs,
361
+ ) -> list[int]:
362
+ r"""Download all neuron metadata.
363
+
364
+ Parameters
365
+ ----------
366
+ path : str
367
+ Path to save data.
368
+ pages : List of int, optional
369
+ If is None, download all pages.
370
+ verbose : bool, default False
371
+ Show verbose log.
372
+ **kwargs :
373
+ Forwarding to `get`.
374
+
375
+ Returns
376
+ -------
377
+ err_pages : List of int
378
+ Failed pages.
379
+ """
380
+
381
+ # TODO: how to cache between versions?
382
+ import lmdb
383
+
384
+ env = lmdb.Environment(path, map_size=SIZE_METADATA)
385
+ if pages is None:
386
+ res = self._get_metadata(page=0, page_size=1, **kwargs)
387
+ total = res["page"]["totalElements"]
388
+ pages = range(math.ceil(total / page_size))
389
+
390
+ err_pages = []
391
+ for page in tqdm(pages) if self.verbose else pages:
392
+ try:
393
+ res = self._get_metadata(page, page_size=page_size, **kwargs)
394
+ with env.begin(write=True) as tx:
395
+ for neuron in res["_embedded"]["neuronResources"]:
396
+ k = str(neuron["neuron_id"]).encode("utf-8")
397
+ v = json.dumps(neuron).encode("utf-8")
398
+ tx.put(key=k, value=v)
399
+ except IOError as e:
400
+ err_pages.append(page)
401
+ self._warning("fails to get metadata of page %s, err: %s", page, e)
402
+
403
+ env.close()
404
+ return err_pages
405
+
406
+ # pylint: disable-next=too-many-locals
407
+ def _download_files(
408
+ self,
409
+ url: str,
410
+ path: str,
411
+ path_metadata: str,
412
+ *,
413
+ keys: Optional[Iterable[bytes]] = None,
414
+ override: bool = False,
415
+ map_size: int = 512 * GB,
416
+ **kwargs,
417
+ ) -> list[bytes]:
418
+ """Download files.
419
+
420
+ Parameters
421
+ ----------
422
+ url : str
423
+ path : str
424
+ Path to save data.
425
+ path_metadata : str
426
+ Path to lmdb of metadata.
427
+ keys : List of bytes, optional
428
+ If exist, ignore `override` option. If None, download all key.
429
+ override : bool, default False
430
+ Override even exists.
431
+ map_size : int, default 512GB
432
+ **kwargs :
433
+ Forwarding to `get`.
434
+
435
+ Returns
436
+ -------
437
+ err_keys : List of str
438
+ Failed keys.
439
+ """
440
+
441
+ import lmdb
442
+
443
+ env_m = lmdb.Environment(path_metadata, map_size=SIZE_METADATA, readonly=True)
444
+ env_c = lmdb.Environment(path, map_size=map_size)
445
+ if keys is None:
446
+ with env_m.begin() as tx_m:
447
+ if override:
448
+ keys = [k for k, v in tx_m.cursor()]
226
449
  else:
227
- bs = io.BytesIO(bs) # type: ignore
228
- with (
229
- open(fs, "w", encoding=encoding) as fw,
230
- FileReader(bs, encoding="detect") as fr,
231
- ):
232
- fw.writelines(fr.readlines())
233
- except Exception as e: # pylint: disable=broad-exception-caught
234
- logging.warning("fails to convert of %s, err: %s", kk, e)
235
-
236
- env_c.close()
237
-
238
-
239
- def download_neuromorpho(
240
- path: str, *, retry: int = 3, verbose: bool = False, **kwargs
241
- ) -> None:
242
- kwargs.setdefault("verbose", verbose)
450
+ with env_c.begin() as tx:
451
+ keys = [k for k, v in tx_m.cursor() if tx.get(k) is None]
243
452
 
244
- path_m = os.path.join(path, "metadata")
245
- path_c = os.path.join(path, "cng_version")
246
-
247
- err_pages = download_metadata(path_m, **kwargs)
248
- for i in range(retry):
249
- if len(err_pages) == 0:
250
- break
251
-
252
- log = print if verbose else logging.info
253
- log("retry %d of download metadata: %s", i, json.dumps(err_pages))
254
- err_pages = download_metadata(path_m, pages=err_pages, **kwargs)
255
-
256
- if len(err_pages) != 0:
257
- logging.warning(
258
- "download metadata pages failed after %d retry: %s",
259
- retry,
260
- json.dumps(err_pages),
261
- )
453
+ err_keys = []
454
+ for k in tqdm(keys) if self.verbose else keys:
455
+ try:
456
+ with env_m.begin() as tx:
457
+ metadata = json.loads(tx.get(k).decode("utf-8")) # type: ignore
458
+
459
+ swc = self._get_file(url, metadata, **kwargs)
460
+ with env_c.begin(write=True) as tx:
461
+ tx.put(key=k, value=swc)
462
+ except IOError as e:
463
+ err_keys.append(k)
464
+ self._warning(
465
+ "fails to get morphology file `%s`, err: %s", k.decode("utf-8"), e
466
+ )
262
467
 
263
- err_keys = download_cng_version(path_c, path_m, **kwargs)
264
- for i in range(retry):
265
- if len(err_keys) == 0:
266
- break
267
-
268
- err_keys_str = json.dumps([i.decode("utf-8") for i in err_keys])
269
- logging.info("retry %d download CNG version: %d", i, err_keys_str)
270
- if verbose:
271
- print(f"retry {i} download CNG version: {err_keys_str}")
272
- err_keys = download_cng_version(path_c, path_m, keys=err_keys, **kwargs)
273
-
274
- if len(err_keys) != 0:
275
- err_keys_str = json.dumps([i.decode("utf-8") for i in err_keys])
276
- logging.warning(
277
- "download CNG version failed after %d retry: %s", retry, err_keys_str
468
+ env_m.close()
469
+ env_c.close()
470
+ return err_keys
471
+
472
+ def _get_metadata(
473
+ self, page: int, page_size: int = API_PAGE_SIZE_MAX, **kwargs
474
+ ) -> dict[str, Any]:
475
+ params = {
476
+ "page": page,
477
+ "size": page_size,
478
+ "sort": "neuron_id,neuron_id,asc",
479
+ }
480
+ query = "&".join([f"{k}={v}" for k, v in params.items()])
481
+ url = f"{URL_METADATA}?{query}"
482
+ resp = self._get(url, **kwargs)
483
+ return json.loads(resp)
484
+
485
+ def _get_file(self, url: str, metadata: dict[str, Any], **kwargs) -> bytes:
486
+ """Get file.
487
+
488
+ Returns
489
+ -------
490
+ bs : bytes
491
+ Bytes of morphology file, encoding is NOT FIXED.
492
+ """
493
+
494
+ archive = urllib.parse.quote(metadata["archive"].lower())
495
+ neuron = urllib.parse.quote(metadata["neuron_name"])
496
+ ext = self._guess_ext(metadata)
497
+ url = (
498
+ url.replace("$ARCHIVE", archive)
499
+ .replace("$NEURON", neuron)
500
+ .replace("$EXT", ext)
278
501
  )
502
+ return self._get(url, **kwargs)
503
+
504
+ def _get(
505
+ self, url: str, *, timeout: int = 2 * 60, proxy: Optional[str] = None
506
+ ) -> bytes:
507
+ if not url.startswith("http://") and not url.startswith("https://"):
508
+ url = urllib.parse.urljoin(self.url_base, url)
509
+
510
+ proxies = None
511
+ if proxy is not None:
512
+ proxies = {"http": proxy, "https": proxy}
513
+
514
+ response = self._session().get(url, timeout=timeout, proxies=proxies)
515
+ response.raise_for_status()
516
+ return response.content
517
+
518
+ def _session(self) -> Any:
519
+ if hasattr(self, "session"):
520
+ return self.session
521
+
522
+ import requests
523
+ import requests.adapters
524
+ import urllib3
525
+ import urllib3.util
526
+
527
+ class CustomSSLContextHTTPAdapter(requests.adapters.HTTPAdapter):
528
+ def __init__(self, ssl_context=None, **kwargs):
529
+ self.ssl_context = ssl_context
530
+ super().__init__(**kwargs)
531
+
532
+ def init_poolmanager(self, connections, maxsize, block=False):
533
+ super().init_poolmanager(
534
+ connections, maxsize, block, ssl_context=self.ssl_context
535
+ )
279
536
 
537
+ def proxy_manager_for(self, proxy, **proxy_kwargs):
538
+ return super().proxy_manager_for(
539
+ proxy, **proxy_kwargs, ssl_context=self.ssl_context
540
+ )
280
541
 
281
- def download_metadata(
282
- path: str, *, pages: Optional[Iterable[int]] = None, verbose: bool = False, **kwargs
283
- ) -> List[int]:
284
- """Download all neuron metadata.
285
-
286
- Parameters
287
- ----------
288
- path : str
289
- Path to save data.
290
- pages : list of int, optional
291
- If is None, download all pages.
292
- verbose : bool, default False
293
- Show verbose log.
294
- **kwargs :
295
- Forwarding to `get`.
296
-
297
- Returns
298
- -------
299
- err_pages : list of int
300
- Failed pages.
301
- """
302
- # TODO: how to cache between versions?
303
- import lmdb
304
- from tqdm import tqdm
305
-
306
- env = lmdb.Environment(path, map_size=SIZE_METADATA)
307
- page_size = API_NEURON_MAX_SIZE
308
- if pages is None:
309
- res = get_metadata(page=0, page_size=1, **kwargs)
310
- total = res["page"]["totalElements"]
311
- pages = range(math.ceil(total / page_size))
312
-
313
- err_pages = []
314
- for page in tqdm(pages) if verbose else pages:
315
- try:
316
- res = get_metadata(page, page_size=page_size, **kwargs)
317
- with env.begin(write=True) as tx:
318
- for neuron in res["_embedded"]["neuronResources"]:
319
- k = str(neuron["neuron_id"]).encode("utf-8")
320
- v = json.dumps(neuron).encode("utf-8")
321
- tx.put(key=k, value=v)
322
- except Exception as e: # pylint: disable=broad-exception-caught
323
- err_pages.append(page)
324
- logging.warning("fails to get metadata of page %s, err: %s", page, e)
325
-
326
- env.close()
327
- return err_pages
328
-
329
-
330
- # pylint: disable-next=too-many-locals
331
- def download_cng_version(
332
- path: str,
333
- path_metadata: str,
334
- *,
335
- keys: Optional[Iterable[bytes]] = None,
336
- override: bool = False,
337
- verbose: bool = False,
338
- **kwargs,
339
- ) -> List[bytes]:
340
- """Download GNG version swc.
341
-
342
- Parameters
343
- ----------
344
- path : str
345
- Path to save data.
346
- path_metadata : str
347
- Path to lmdb of metadata.
348
- keys : list of bytes, optional
349
- If exist, ignore `override` option. If None, download all key.
350
- override : bool, default False
351
- Override even exists.
352
- verbose : bool, default False
353
- Show verbose log.
354
- **kwargs :
355
- Forwarding to `get`.
356
-
357
- Returns
358
- -------
359
- err_keys : list of str
360
- Failed keys.
361
- """
362
- import lmdb
363
- from tqdm import tqdm
364
-
365
- env_m = lmdb.Environment(path_metadata, map_size=SIZE_METADATA, readonly=True)
366
- env_c = lmdb.Environment(path, map_size=SIZE_DATA)
367
- if keys is None:
368
- with env_m.begin() as tx_m:
369
- if override:
370
- keys = [k for k, v in tx_m.cursor()]
371
- else:
372
- with env_c.begin() as tx:
373
- keys = [k for k, v in tx_m.cursor() if tx.get(k) is None]
374
-
375
- err_keys = []
376
- for k in tqdm(keys) if verbose else keys:
377
- try:
378
- with env_m.begin() as tx:
379
- metadata = json.loads(tx.get(k).decode("utf-8")) # type: ignore
380
-
381
- swc = get_cng_version(metadata, **kwargs)
382
- with env_c.begin(write=True) as tx:
383
- tx.put(key=k, value=swc)
384
- except Exception as e: # pylint: disable=broad-exception-caught
385
- err_keys.append(k)
386
- logging.warning(
387
- "fails to get cng version of '%s', err: %s", k.decode("utf-8"), e
388
- )
389
-
390
- env_m.close()
391
- env_c.close()
392
- return err_keys
393
-
394
-
395
- def get_metadata(
396
- page, page_size: int = API_NEURON_MAX_SIZE, **kwargs
397
- ) -> Dict[str, Any]:
398
- params = {
399
- "page": page,
400
- "size": page_size,
401
- "sort": "neuron_id,neuron_id,asc",
402
- }
403
- query = "&".join([f"{k}={v}" for k, v in params.items()])
404
- url = f"{URL_NEURON}?{query}"
405
-
406
- s = get(url, **kwargs)
407
- return json.loads(s)
408
-
542
+ ctx = urllib3.util.create_urllib3_context()
543
+ ctx.load_default_certs()
544
+ ctx.set_ciphers("DEFAULT@SECLEVEL=1")
409
545
 
410
- def get_cng_version(metadata: Dict[str, Any], **kwargs) -> bytes:
411
- """Get CNG version swc.
546
+ session = requests.session()
547
+ session.adapters.pop("https://", None)
548
+ session.mount("https://", CustomSSLContextHTTPAdapter(ssl_context=ctx))
412
549
 
413
- Returns
414
- -------
415
- bs : bytes
416
- SWC bytes, encoding is NOT FIXED.
417
- """
418
- archive = urllib.parse.quote(metadata["archive"].lower())
419
- neuron = urllib.parse.quote(metadata["neuron_name"])
420
- url = URL_CNG_VERSION.replace("$ARCHIVE", archive).replace("$NEURON", neuron)
421
- return get(url, **kwargs)
550
+ self.session = session
551
+ return session
422
552
 
553
+ # format
554
+ def _guess_ext(self, metadata) -> str:
555
+ match metadata["original_format"]:
556
+ case "Custom.xml":
557
+ return "morph.xml"
423
558
 
424
- def get(url: str, *, timeout: int = 2 * 60, proxy: Optional[str] = None) -> bytes:
425
- # pylint: disable=c-extension-no-member
426
- import certifi
427
- import pycurl
559
+ case _:
560
+ _, ext = os.path.splitext(metadata["original_format"])
561
+ return ext[1:]
428
562
 
429
- buffer = io.BytesIO()
430
- c = pycurl.Curl()
431
- c.setopt(pycurl.URL, url)
432
- c.setopt(pycurl.WRITEDATA, buffer)
433
- c.setopt(pycurl.CAINFO, certifi.where())
434
- c.setopt(pycurl.TIMEOUT, timeout)
435
- if proxy is not None:
436
- c.setopt(pycurl.PROXY, proxy)
437
- c.perform()
563
+ # log helper
438
564
 
439
- code = c.getinfo(pycurl.RESPONSE_CODE)
440
- if code != 200:
441
- raise ConnectionError(f"fails to fetch data, status: {code}")
565
+ def _info(self, msg: str, *arg):
566
+ logging.info(msg, *arg, stacklevel=2)
567
+ if self.verbose:
568
+ print(msg.format(*arg))
442
569
 
443
- c.close()
444
- return buffer.getvalue()
570
+ def _warning(self, msg: str, *arg):
571
+ logging.warning(msg, *arg, stacklevel=2)
572
+ if self.verbose:
573
+ print(msg.format(*arg))
445
574
 
446
575
 
447
576
  if __name__ == "__main__":
@@ -451,6 +580,14 @@ if __name__ == "__main__":
451
580
  sub = subparsers.add_parser("download")
452
581
  sub.add_argument("-o", "--path", type=str)
453
582
  sub.add_argument("--retry", type=int, default=3)
583
+ sub.add_argument("--metadata", type=bool, default=True)
584
+ sub.add_argument(
585
+ "--resources",
586
+ type=str,
587
+ nargs="*",
588
+ default=["morpho_cng"],
589
+ choices=["morpho_cng", "morpho_source", "log_cng", "log_source"],
590
+ )
454
591
  sub.add_argument("--proxy", type=str, default=None)
455
592
  sub.add_argument("--verbose", type=bool, default=True)
456
593
  sub.set_defaults(func=download_neuromorpho)