swcgeom 0.15.0__py3-none-any.whl → 0.17.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of swcgeom might be problematic. Click here for more details.

Files changed (42) hide show
  1. swcgeom/_version.py +2 -2
  2. swcgeom/analysis/__init__.py +1 -3
  3. swcgeom/analysis/feature_extractor.py +3 -3
  4. swcgeom/analysis/{node_features.py → features.py} +105 -3
  5. swcgeom/analysis/lmeasure.py +821 -0
  6. swcgeom/analysis/sholl.py +31 -2
  7. swcgeom/core/__init__.py +4 -0
  8. swcgeom/core/branch.py +9 -4
  9. swcgeom/core/{segment.py → compartment.py} +14 -9
  10. swcgeom/core/node.py +0 -8
  11. swcgeom/core/path.py +21 -6
  12. swcgeom/core/population.py +47 -7
  13. swcgeom/core/swc_utils/assembler.py +12 -1
  14. swcgeom/core/swc_utils/base.py +12 -5
  15. swcgeom/core/swc_utils/checker.py +12 -2
  16. swcgeom/core/tree.py +34 -37
  17. swcgeom/core/tree_utils.py +4 -0
  18. swcgeom/images/augmentation.py +6 -1
  19. swcgeom/images/contrast.py +107 -0
  20. swcgeom/images/folder.py +71 -14
  21. swcgeom/images/io.py +74 -88
  22. swcgeom/transforms/__init__.py +2 -0
  23. swcgeom/transforms/image_preprocess.py +100 -0
  24. swcgeom/transforms/image_stack.py +1 -4
  25. swcgeom/transforms/images.py +176 -5
  26. swcgeom/transforms/mst.py +5 -5
  27. swcgeom/transforms/neurolucida_asc.py +495 -0
  28. swcgeom/transforms/tree.py +5 -1
  29. swcgeom/utils/__init__.py +1 -0
  30. swcgeom/utils/neuromorpho.py +425 -300
  31. swcgeom/utils/numpy_helper.py +14 -4
  32. swcgeom/utils/plotter_2d.py +130 -0
  33. swcgeom/utils/renderer.py +28 -139
  34. swcgeom/utils/sdf.py +5 -1
  35. {swcgeom-0.15.0.dist-info → swcgeom-0.17.0.dist-info}/METADATA +3 -3
  36. swcgeom-0.17.0.dist-info/RECORD +65 -0
  37. {swcgeom-0.15.0.dist-info → swcgeom-0.17.0.dist-info}/WHEEL +1 -1
  38. swcgeom/analysis/branch_features.py +0 -67
  39. swcgeom/analysis/path_features.py +0 -37
  40. swcgeom-0.15.0.dist-info/RECORD +0 -62
  41. {swcgeom-0.15.0.dist-info → swcgeom-0.17.0.dist-info}/LICENSE +0 -0
  42. {swcgeom-0.15.0.dist-info → swcgeom-0.17.0.dist-info}/top_level.txt +0 -0
@@ -81,7 +81,9 @@ import logging
81
81
  import math
82
82
  import os
83
83
  import urllib.parse
84
- from typing import Any, Callable, Dict, Iterable, List, Optional
84
+ from typing import Any, Callable, Dict, Iterable, List, Literal, Optional, Tuple
85
+
86
+ from tqdm import tqdm
85
87
 
86
88
  from swcgeom.utils import FileReader
87
89
 
@@ -89,24 +91,39 @@ __all__ = [
89
91
  "neuromorpho_is_valid",
90
92
  "neuromorpho_convert_lmdb_to_swc",
91
93
  "download_neuromorpho",
94
+ "NeuroMorpho",
92
95
  ]
93
96
 
94
- URL_NEURON = "https://neuromorpho.org/api/neuron"
95
- URL_CNG_VERSION = (
96
- "https://neuromorpho.org/dableFiles/$ARCHIVE/CNG%20version/$NEURON.CNG.swc"
97
- )
98
- API_NEURON_MAX_SIZE = 500
97
+
98
+ URL_BASE = "https://neuromorpho.org"
99
+ URL_METADATA = "api/neuron"
100
+ URL_MORPHO_CNG = "dableFiles/$ARCHIVE/CNG%20version/$NEURON.CNG.swc"
101
+ URL_MORPHO_SOURCE = "dableFiles/$ARCHIVE/Source-Version/$NEURON.$EXT"
102
+ URL_LOG_CNG = "dableFiles/$ARCHIVE/Remaining%20issues/$NEURON.CNG.swc.std"
103
+ URL_LOG_SOURCE = "dableFiles/$ARCHIVE/Standardization%20log/$NEURON.std"
104
+ API_PAGE_SIZE_MAX = 500
99
105
 
100
106
  KB = 1024
101
107
  MB = 1024 * KB
102
108
  GB = 1024 * MB
103
109
 
104
110
  # Test version: 8.5.25 (2023-08-01)
105
- # About 1.1 GB and 18 GB
106
111
  # No ETAs for future version
112
+ # Size of metadata about 0.5 GB
113
+ # Size of morpho_cng about 18 GB
114
+ # Not sure about the size of others
107
115
  SIZE_METADATA = 2 * GB
108
116
  SIZE_DATA = 20 * GB
109
117
 
118
+ RESOURCES = Literal["morpho_cng", "morpho_source", "log_cng", "log_source"]
119
+ DOWNLOAD_CONFIGS: Dict[RESOURCES, Tuple[str, int]] = {
120
+ # name/path: (url, size)
121
+ "morpho_cng": (URL_MORPHO_CNG, 20 * GB),
122
+ "morpho_source": (URL_LOG_CNG, 512 * GB),
123
+ "log_cng": (URL_LOG_CNG, 512 * GB),
124
+ "log_source": (URL_LOG_SOURCE, 512 * GB),
125
+ }
126
+
110
127
  # fmt:off
111
128
  # Test version: 8.5.25 (2023-08-01)
112
129
  # No ETAs for future version
@@ -132,316 +149,416 @@ def neuromorpho_is_valid(metadata: Dict[str, Any]) -> bool:
132
149
  return metadata["neuron_id"] not in invalid_ids
133
150
 
134
151
 
135
- # pylint: disable-next=too-many-locals
136
152
  def neuromorpho_convert_lmdb_to_swc(
137
- root: str,
138
- dest: Optional[str] = None,
139
- *,
140
- group_by: Optional[str | Callable[[Dict[str, Any]], str | None]] = None,
141
- where: Optional[Callable[[Dict[str, Any]], bool]] = None,
142
- encoding: str | None = "utf-8",
143
- verbose: bool = False,
153
+ root: str, dest: Optional[str] = None, *, verbose: bool = False, **kwargs
144
154
  ) -> None:
145
- """Convert lmdb format to SWCs.
146
-
147
- Parameters
148
- ----------
149
- path : str
150
- dest : str, optional
151
- If None, use `path/swc`.
152
- group_by : str | (metadata: Dict[str, Any]) -> str | None, optional
153
- Group neurons by metadata. If a None is returned then no
154
- grouping. If a string is entered, use it as a metadata
155
- attribute name for grouping, e.g.: `archive`, `species`.
156
- where : (metadata: Dict[str, Any]) -> bool, optional
157
- Filter neurons by metadata.
158
- encoding : str | None, default to `utf-8`
159
- Change swc encoding, part of the original data is not utf-8
160
- encoded. If is None, keep the original encoding format.
161
- verbose : bool, default False
162
- Print verbose info.
163
-
164
- Notes
165
- -----
166
- We are asserting the following folder.
167
-
168
- ```text
169
- |- root
170
- | |- metadata # input
171
- | |- cng_version # input
172
- | |- swc # output
173
- | | |- groups # output of groups if grouped
174
- ```
175
-
176
- See Also
177
- --------
178
- neuromorpho_is_valid :
179
- Recommended filter function, try `where=neuromorpho_is_valid`
180
- """
181
- import lmdb
182
- from tqdm import tqdm
183
-
184
- assert os.path.exists(root)
185
-
186
- env_m = lmdb.Environment(os.path.join(root, "metadata"), readonly=True)
187
- with env_m.begin() as tx_m:
188
- where = where or (lambda _: True)
189
- if isinstance(group_by, str):
190
- key = group_by
191
- group_by = lambda v: v[key] # pylint: disable=unnecessary-lambda-assignment
192
- elif group_by is None:
193
- group_by = lambda _: None # pylint: disable=unnecessary-lambda-assignment
194
- items = []
195
- for k, v in tx_m.cursor():
196
- metadata = json.loads(v)
197
- if where(metadata):
198
- items.append((k, group_by(metadata)))
199
-
200
- env_m.close()
201
-
202
- dest = dest or os.path.join(root, "swc")
203
- os.makedirs(dest, exist_ok=True)
204
- for grp in set(grp for _, grp in items if grp is not None):
205
- os.makedirs(os.path.join(dest, grp), exist_ok=True)
206
-
207
- env_c = lmdb.Environment(os.path.join(root, "cng_version"), readonly=True)
208
- with env_c.begin() as tx_c:
209
- for k, grp in tqdm(items) if verbose else items:
210
- kk = k.decode("utf-8")
211
- try:
212
- bs = tx_c.get(k)
213
- if bs is None:
214
- logging.warning("cng version of '%s' not exists", kk)
215
- continue
216
-
217
- fs = (
218
- os.path.join(dest, grp, f"{kk}.swc")
219
- if grp is not None
220
- else os.path.join(dest, f"{kk}.swc")
155
+ nmo = NeuroMorpho(root, verbose=verbose)
156
+ nmo.convert_lmdb_to_swc(dest, **kwargs)
157
+
158
+
159
+ def download_neuromorpho(path: str, *, verbose: bool = False, **kwargs) -> None:
160
+ nmo = NeuroMorpho(path, verbose=verbose)
161
+ nmo.download(**kwargs)
162
+
163
+
164
+ class NeuroMorpho:
165
+ def __init__(
166
+ self, root: str, *, url_base: str = URL_BASE, verbose: bool = False
167
+ ) -> None:
168
+ """
169
+ Parameters
170
+ ----------
171
+ root : str
172
+ verbose : bool, default False
173
+ Show verbose log.
174
+ """
175
+
176
+ super().__init__()
177
+ self.root = root
178
+ self.url_base = url_base
179
+ self.verbose = verbose
180
+
181
+ def download(
182
+ self,
183
+ *,
184
+ retry: int = 3,
185
+ metadata: bool = True,
186
+ resources: Iterable[RESOURCES] = ["morpho_cng"],
187
+ **kwargs,
188
+ ) -> None:
189
+ """Download data from neuromorpho.org."""
190
+
191
+ # metadata
192
+ path_m = os.path.join(self.root, "metadata")
193
+ if metadata:
194
+ err_pages = None
195
+ for i in range(retry + 1):
196
+ if err_pages is not None and len(err_pages) == 0:
197
+ break
198
+
199
+ self._info("download metadata")
200
+ if i != 0:
201
+ self._info("retry %d: %s", i, json.dumps(err_pages))
202
+
203
+ err_pages = self._download_metadata(path_m, pages=err_pages, **kwargs)
204
+
205
+ self._info("download metadata done")
206
+ if err_pages is not None and len(err_pages) != 0:
207
+ self._warning("fails to download metadata: %s", json.dumps(err_pages))
208
+ else:
209
+ self._info("skip download metadata")
210
+
211
+ # file
212
+ def dumps(keys: List[bytes]) -> str:
213
+ return json.dumps([i.decode("utf-8") for i in keys])
214
+
215
+ for name in resources:
216
+ url, map_size = DOWNLOAD_CONFIGS[name]
217
+ path = os.path.join(self.root, name)
218
+
219
+ err_keys = None
220
+ for i in range(retry + 1):
221
+ if err_keys is not None and len(err_keys) == 0:
222
+ break
223
+
224
+ self._info("download %s", name)
225
+ if err_keys is not None:
226
+ self._info("retry %d: %s", i, dumps(err_keys))
227
+
228
+ err_keys = self._download_files(
229
+ url, path, path_m, map_size=map_size, **kwargs
221
230
  )
222
231
 
223
- if encoding is None:
224
- with open(fs, "wb") as f:
225
- f.write(bs) # type: ignore
232
+ self._info("download %s done", name)
233
+ if err_keys is not None and len(err_keys) != 0:
234
+ self._warning("fails to download %s: %s", name, dumps(err_keys))
235
+
236
+ # pylint: disable-next=too-many-locals
237
+ def convert_lmdb_to_swc(
238
+ self,
239
+ dest: Optional[str] = None,
240
+ *,
241
+ group_by: Optional[str | Callable[[Dict[str, Any]], str | None]] = None,
242
+ where: Optional[Callable[[Dict[str, Any]], bool]] = None,
243
+ encoding: str | None = "utf-8",
244
+ ) -> None:
245
+ r"""Convert lmdb format to SWCs.
246
+
247
+ Parameters
248
+ ----------
249
+ path : str
250
+ dest : str, optional
251
+ If None, use `path/swc`.
252
+ group_by : str | (metadata: Dict[str, Any]) -> str | None, optional
253
+ Group neurons by metadata. If a None is returned then no
254
+ grouping. If a string is entered, use it as a metadata
255
+ attribute name for grouping, e.g.: `archive`, `species`.
256
+ where : (metadata: Dict[str, Any]) -> bool, optional
257
+ Filter neurons by metadata.
258
+ encoding : str | None, default to `utf-8`
259
+ Change swc encoding, part of the original data is not utf-8
260
+ encoded. If is None, keep the original encoding format.
261
+ verbose : bool, default False
262
+ Print verbose info.
263
+
264
+ Notes
265
+ -----
266
+ We are asserting the following folder.
267
+
268
+ ```text
269
+ |- root
270
+ | |- metadata # input
271
+ | |- morpho_cng # input
272
+ | |- swc # output
273
+ | | |- groups # output of groups if grouped
274
+ ```
275
+
276
+ See Also
277
+ --------
278
+ neuromorpho_is_valid :
279
+ Recommended filter function, try `where=neuromorpho_is_valid`
280
+ """
281
+
282
+ import lmdb
283
+
284
+ env_m = lmdb.Environment(os.path.join(self.root, "metadata"), readonly=True)
285
+ with env_m.begin() as tx_m:
286
+ where = where or (lambda _: True)
287
+ if isinstance(group_by, str):
288
+ key = group_by
289
+ group_by = lambda v: v[
290
+ key
291
+ ] # pylint: disable=unnecessary-lambda-assignment
292
+ elif group_by is None:
293
+ group_by = (
294
+ lambda _: None
295
+ ) # pylint: disable=unnecessary-lambda-assignment
296
+ items = []
297
+ for k, v in tx_m.cursor():
298
+ metadata = json.loads(v)
299
+ if where(metadata):
300
+ items.append((k, group_by(metadata)))
301
+
302
+ env_m.close()
303
+
304
+ dest = dest or os.path.join(self.root, "swc")
305
+ os.makedirs(dest, exist_ok=True)
306
+ for grp in set(grp for _, grp in items if grp is not None):
307
+ os.makedirs(os.path.join(dest, grp), exist_ok=True)
308
+
309
+ env_c = lmdb.Environment(os.path.join(self.root, "morpho_cng"), readonly=True)
310
+ with env_c.begin() as tx_c:
311
+ for k, grp in tqdm(items) if self.verbose else items:
312
+ kk = k.decode("utf-8")
313
+ try:
314
+ bs = tx_c.get(k)
315
+ if bs is None:
316
+ self._warning("morpho_cng of '%s' not exists", kk)
317
+ continue
318
+
319
+ fs = (
320
+ os.path.join(dest, grp, f"{kk}.swc")
321
+ if grp is not None
322
+ else os.path.join(dest, f"{kk}.swc")
323
+ )
324
+
325
+ if encoding is None:
326
+ with open(fs, "wb") as f:
327
+ f.write(bs) # type: ignore
328
+ else:
329
+ bs = io.BytesIO(bs) # type: ignore
330
+ with (
331
+ open(fs, "w", encoding=encoding) as fw,
332
+ FileReader(bs, encoding="detect") as fr,
333
+ ):
334
+ fw.writelines(fr.readlines())
335
+ except (IOError, lmdb.Error) as e:
336
+ self._warning("fails to convert of %s, err: %s", kk, e)
337
+
338
+ env_c.close()
339
+
340
+ # Downloader
341
+
342
+ def _download_metadata(
343
+ self,
344
+ path: str,
345
+ *,
346
+ pages: Optional[Iterable[int]] = None,
347
+ page_size: int = API_PAGE_SIZE_MAX,
348
+ **kwargs,
349
+ ) -> List[int]:
350
+ r"""Download all neuron metadata.
351
+
352
+ Parameters
353
+ ----------
354
+ path : str
355
+ Path to save data.
356
+ pages : list of int, optional
357
+ If is None, download all pages.
358
+ verbose : bool, default False
359
+ Show verbose log.
360
+ **kwargs :
361
+ Forwarding to `get`.
362
+
363
+ Returns
364
+ -------
365
+ err_pages : list of int
366
+ Failed pages.
367
+ """
368
+
369
+ # TODO: how to cache between versions?
370
+ import lmdb
371
+
372
+ env = lmdb.Environment(path, map_size=SIZE_METADATA)
373
+ if pages is None:
374
+ res = self._get_metadata(page=0, page_size=1, **kwargs)
375
+ total = res["page"]["totalElements"]
376
+ pages = range(math.ceil(total / page_size))
377
+
378
+ err_pages = []
379
+ for page in tqdm(pages) if self.verbose else pages:
380
+ try:
381
+ res = self._get_metadata(page, page_size=page_size, **kwargs)
382
+ with env.begin(write=True) as tx:
383
+ for neuron in res["_embedded"]["neuronResources"]:
384
+ k = str(neuron["neuron_id"]).encode("utf-8")
385
+ v = json.dumps(neuron).encode("utf-8")
386
+ tx.put(key=k, value=v)
387
+ except IOError as e:
388
+ err_pages.append(page)
389
+ self._warning("fails to get metadata of page %s, err: %s", page, e)
390
+
391
+ env.close()
392
+ return err_pages
393
+
394
+ # pylint: disable-next=too-many-locals
395
+ def _download_files(
396
+ self,
397
+ url: str,
398
+ path: str,
399
+ path_metadata: str,
400
+ *,
401
+ keys: Optional[Iterable[bytes]] = None,
402
+ override: bool = False,
403
+ map_size: int = 512 * GB,
404
+ **kwargs,
405
+ ) -> List[bytes]:
406
+ """Download files.
407
+
408
+ Parameters
409
+ ----------
410
+ url : str
411
+ path : str
412
+ Path to save data.
413
+ path_metadata : str
414
+ Path to lmdb of metadata.
415
+ keys : list of bytes, optional
416
+ If exist, ignore `override` option. If None, download all key.
417
+ override : bool, default False
418
+ Override even exists.
419
+ map_size : int, default 512GB
420
+ **kwargs :
421
+ Forwarding to `get`.
422
+
423
+ Returns
424
+ -------
425
+ err_keys : list of str
426
+ Failed keys.
427
+ """
428
+
429
+ import lmdb
430
+
431
+ env_m = lmdb.Environment(path_metadata, map_size=SIZE_METADATA, readonly=True)
432
+ env_c = lmdb.Environment(path, map_size=map_size)
433
+ if keys is None:
434
+ with env_m.begin() as tx_m:
435
+ if override:
436
+ keys = [k for k, v in tx_m.cursor()]
226
437
  else:
227
- bs = io.BytesIO(bs) # type: ignore
228
- with (
229
- open(fs, "w", encoding=encoding) as fw,
230
- FileReader(bs, encoding="detect") as fr,
231
- ):
232
- fw.writelines(fr.readlines())
233
- except Exception as e: # pylint: disable=broad-exception-caught
234
- logging.warning("fails to convert of %s, err: %s", kk, e)
235
-
236
- env_c.close()
237
-
238
-
239
- def download_neuromorpho(
240
- path: str, *, retry: int = 3, verbose: bool = False, **kwargs
241
- ) -> None:
242
- kwargs.setdefault("verbose", verbose)
243
-
244
- path_m = os.path.join(path, "metadata")
245
- path_c = os.path.join(path, "cng_version")
438
+ with env_c.begin() as tx:
439
+ keys = [k for k, v in tx_m.cursor() if tx.get(k) is None]
246
440
 
247
- err_pages = download_metadata(path_m, **kwargs)
248
- for i in range(retry):
249
- if len(err_pages) == 0:
250
- break
251
-
252
- log = print if verbose else logging.info
253
- log("retry %d of download metadata: %s", i, json.dumps(err_pages))
254
- err_pages = download_metadata(path_m, pages=err_pages, **kwargs)
255
-
256
- if len(err_pages) != 0:
257
- logging.warning(
258
- "download metadata pages failed after %d retry: %s",
259
- retry,
260
- json.dumps(err_pages),
261
- )
441
+ err_keys = []
442
+ for k in tqdm(keys) if self.verbose else keys:
443
+ try:
444
+ with env_m.begin() as tx:
445
+ metadata = json.loads(tx.get(k).decode("utf-8")) # type: ignore
446
+
447
+ swc = self._get_file(url, metadata, **kwargs)
448
+ with env_c.begin(write=True) as tx:
449
+ tx.put(key=k, value=swc)
450
+ except IOError as e:
451
+ err_keys.append(k)
452
+ self._warning(
453
+ "fails to get morphology file `%s`, err: %s", k.decode("utf-8"), e
454
+ )
262
455
 
263
- err_keys = download_cng_version(path_c, path_m, **kwargs)
264
- for i in range(retry):
265
- if len(err_keys) == 0:
266
- break
267
-
268
- err_keys_str = json.dumps([i.decode("utf-8") for i in err_keys])
269
- logging.info("retry %d download CNG version: %d", i, err_keys_str)
270
- if verbose:
271
- print(f"retry {i} download CNG version: {err_keys_str}")
272
- err_keys = download_cng_version(path_c, path_m, keys=err_keys, **kwargs)
273
-
274
- if len(err_keys) != 0:
275
- err_keys_str = json.dumps([i.decode("utf-8") for i in err_keys])
276
- logging.warning(
277
- "download CNG version failed after %d retry: %s", retry, err_keys_str
456
+ env_m.close()
457
+ env_c.close()
458
+ return err_keys
459
+
460
+ def _get_metadata(
461
+ self, page: int, page_size: int = API_PAGE_SIZE_MAX, **kwargs
462
+ ) -> Dict[str, Any]:
463
+ params = {
464
+ "page": page,
465
+ "size": page_size,
466
+ "sort": "neuron_id,neuron_id,asc",
467
+ }
468
+ query = "&".join([f"{k}={v}" for k, v in params.items()])
469
+ url = f"{URL_METADATA}?{query}"
470
+ resp = self._get(url, **kwargs)
471
+ return json.loads(resp)
472
+
473
+ def _get_file(self, url: str, metadata: Dict[str, Any], **kwargs) -> bytes:
474
+ """Get file.
475
+
476
+ Returns
477
+ -------
478
+ bs : bytes
479
+ Bytes of morphology file, encoding is NOT FIXED.
480
+ """
481
+
482
+ archive = urllib.parse.quote(metadata["archive"].lower())
483
+ neuron = urllib.parse.quote(metadata["neuron_name"])
484
+ ext = self._guess_ext(metadata)
485
+ url = (
486
+ url.replace("$ARCHIVE", archive)
487
+ .replace("$NEURON", neuron)
488
+ .replace("$EXT", ext)
278
489
  )
490
+ return self._get(url, **kwargs)
491
+
492
+ def _get(
493
+ self, url: str, *, timeout: int = 2 * 60, proxy: Optional[str] = None
494
+ ) -> bytes:
495
+ if not url.startswith("http://") and not url.startswith("https://"):
496
+ url = urllib.parse.urljoin(self.url_base, url)
497
+
498
+ proxies = None
499
+ if proxy is not None:
500
+ proxies = {"http": proxy, "https": proxy}
501
+
502
+ response = self._session().get(url, timeout=timeout, proxies=proxies)
503
+ response.raise_for_status()
504
+ return response.content
505
+
506
+ def _session(self) -> Any:
507
+ if hasattr(self, "session"):
508
+ return self.session
509
+
510
+ import requests
511
+ import requests.adapters
512
+ import urllib3
513
+ import urllib3.util
514
+
515
+ class CustomSSLContextHTTPAdapter(requests.adapters.HTTPAdapter):
516
+ def __init__(self, ssl_context=None, **kwargs):
517
+ self.ssl_context = ssl_context
518
+ super().__init__(**kwargs)
519
+
520
+ def init_poolmanager(self, connections, maxsize, block=False):
521
+ super().init_poolmanager(
522
+ connections, maxsize, block, ssl_context=self.ssl_context
523
+ )
279
524
 
525
+ def proxy_manager_for(self, proxy, **proxy_kwargs):
526
+ return super().proxy_manager_for(
527
+ proxy, **proxy_kwargs, ssl_context=self.ssl_context
528
+ )
280
529
 
281
- def download_metadata(
282
- path: str, *, pages: Optional[Iterable[int]] = None, verbose: bool = False, **kwargs
283
- ) -> List[int]:
284
- """Download all neuron metadata.
285
-
286
- Parameters
287
- ----------
288
- path : str
289
- Path to save data.
290
- pages : list of int, optional
291
- If is None, download all pages.
292
- verbose : bool, default False
293
- Show verbose log.
294
- **kwargs :
295
- Forwarding to `get`.
296
-
297
- Returns
298
- -------
299
- err_pages : list of int
300
- Failed pages.
301
- """
302
- # TODO: how to cache between versions?
303
- import lmdb
304
- from tqdm import tqdm
305
-
306
- env = lmdb.Environment(path, map_size=SIZE_METADATA)
307
- page_size = API_NEURON_MAX_SIZE
308
- if pages is None:
309
- res = get_metadata(page=0, page_size=1, **kwargs)
310
- total = res["page"]["totalElements"]
311
- pages = range(math.ceil(total / page_size))
312
-
313
- err_pages = []
314
- for page in tqdm(pages) if verbose else pages:
315
- try:
316
- res = get_metadata(page, page_size=page_size, **kwargs)
317
- with env.begin(write=True) as tx:
318
- for neuron in res["_embedded"]["neuronResources"]:
319
- k = str(neuron["neuron_id"]).encode("utf-8")
320
- v = json.dumps(neuron).encode("utf-8")
321
- tx.put(key=k, value=v)
322
- except Exception as e: # pylint: disable=broad-exception-caught
323
- err_pages.append(page)
324
- logging.warning("fails to get metadata of page %s, err: %s", page, e)
325
-
326
- env.close()
327
- return err_pages
328
-
329
-
330
- # pylint: disable-next=too-many-locals
331
- def download_cng_version(
332
- path: str,
333
- path_metadata: str,
334
- *,
335
- keys: Optional[Iterable[bytes]] = None,
336
- override: bool = False,
337
- verbose: bool = False,
338
- **kwargs,
339
- ) -> List[bytes]:
340
- """Download GNG version swc.
341
-
342
- Parameters
343
- ----------
344
- path : str
345
- Path to save data.
346
- path_metadata : str
347
- Path to lmdb of metadata.
348
- keys : list of bytes, optional
349
- If exist, ignore `override` option. If None, download all key.
350
- override : bool, default False
351
- Override even exists.
352
- verbose : bool, default False
353
- Show verbose log.
354
- **kwargs :
355
- Forwarding to `get`.
356
-
357
- Returns
358
- -------
359
- err_keys : list of str
360
- Failed keys.
361
- """
362
- import lmdb
363
- from tqdm import tqdm
364
-
365
- env_m = lmdb.Environment(path_metadata, map_size=SIZE_METADATA, readonly=True)
366
- env_c = lmdb.Environment(path, map_size=SIZE_DATA)
367
- if keys is None:
368
- with env_m.begin() as tx_m:
369
- if override:
370
- keys = [k for k, v in tx_m.cursor()]
371
- else:
372
- with env_c.begin() as tx:
373
- keys = [k for k, v in tx_m.cursor() if tx.get(k) is None]
374
-
375
- err_keys = []
376
- for k in tqdm(keys) if verbose else keys:
377
- try:
378
- with env_m.begin() as tx:
379
- metadata = json.loads(tx.get(k).decode("utf-8")) # type: ignore
380
-
381
- swc = get_cng_version(metadata, **kwargs)
382
- with env_c.begin(write=True) as tx:
383
- tx.put(key=k, value=swc)
384
- except Exception as e: # pylint: disable=broad-exception-caught
385
- err_keys.append(k)
386
- logging.warning(
387
- "fails to get cng version of '%s', err: %s", k.decode("utf-8"), e
388
- )
389
-
390
- env_m.close()
391
- env_c.close()
392
- return err_keys
393
-
394
-
395
- def get_metadata(
396
- page, page_size: int = API_NEURON_MAX_SIZE, **kwargs
397
- ) -> Dict[str, Any]:
398
- params = {
399
- "page": page,
400
- "size": page_size,
401
- "sort": "neuron_id,neuron_id,asc",
402
- }
403
- query = "&".join([f"{k}={v}" for k, v in params.items()])
404
- url = f"{URL_NEURON}?{query}"
405
-
406
- s = get(url, **kwargs)
407
- return json.loads(s)
408
-
530
+ ctx = urllib3.util.create_urllib3_context()
531
+ ctx.load_default_certs()
532
+ ctx.set_ciphers("DEFAULT@SECLEVEL=1")
409
533
 
410
- def get_cng_version(metadata: Dict[str, Any], **kwargs) -> bytes:
411
- """Get CNG version swc.
534
+ session = requests.session()
535
+ session.adapters.pop("https://", None)
536
+ session.mount("https://", CustomSSLContextHTTPAdapter(ssl_context=ctx))
412
537
 
413
- Returns
414
- -------
415
- bs : bytes
416
- SWC bytes, encoding is NOT FIXED.
417
- """
418
- archive = urllib.parse.quote(metadata["archive"].lower())
419
- neuron = urllib.parse.quote(metadata["neuron_name"])
420
- url = URL_CNG_VERSION.replace("$ARCHIVE", archive).replace("$NEURON", neuron)
421
- return get(url, **kwargs)
538
+ self.session = session
539
+ return session
422
540
 
541
+ # format
542
+ def _guess_ext(self, metadata) -> str:
543
+ match metadata["original_format"]:
544
+ case "Custom.xml":
545
+ return "morph.xml"
423
546
 
424
- def get(url: str, *, timeout: int = 2 * 60, proxy: Optional[str] = None) -> bytes:
425
- # pylint: disable=c-extension-no-member
426
- import certifi
427
- import pycurl
547
+ case _:
548
+ _, ext = os.path.splitext(metadata["original_format"])
549
+ return ext[1:]
428
550
 
429
- buffer = io.BytesIO()
430
- c = pycurl.Curl()
431
- c.setopt(pycurl.URL, url)
432
- c.setopt(pycurl.WRITEDATA, buffer)
433
- c.setopt(pycurl.CAINFO, certifi.where())
434
- c.setopt(pycurl.TIMEOUT, timeout)
435
- if proxy is not None:
436
- c.setopt(pycurl.PROXY, proxy)
437
- c.perform()
551
+ # log helper
438
552
 
439
- code = c.getinfo(pycurl.RESPONSE_CODE)
440
- if code != 200:
441
- raise ConnectionError(f"fails to fetch data, status: {code}")
553
+ def _info(self, msg: str, *arg):
554
+ logging.info(msg, *arg, stacklevel=2)
555
+ if self.verbose:
556
+ print(msg.format(*arg))
442
557
 
443
- c.close()
444
- return buffer.getvalue()
558
+ def _warning(self, msg: str, *arg):
559
+ logging.warning(msg, *arg, stacklevel=2)
560
+ if self.verbose:
561
+ print(msg.format(*arg))
445
562
 
446
563
 
447
564
  if __name__ == "__main__":
@@ -451,6 +568,14 @@ if __name__ == "__main__":
451
568
  sub = subparsers.add_parser("download")
452
569
  sub.add_argument("-o", "--path", type=str)
453
570
  sub.add_argument("--retry", type=int, default=3)
571
+ sub.add_argument("--metadata", type=bool, default=True)
572
+ sub.add_argument(
573
+ "--resources",
574
+ type=str,
575
+ nargs="*",
576
+ default=["morpho_cng"],
577
+ choices=["morpho_cng", "morpho_source", "log_cng", "log_source"],
578
+ )
454
579
  sub.add_argument("--proxy", type=str, default=None)
455
580
  sub.add_argument("--verbose", type=bool, default=True)
456
581
  sub.set_defaults(func=download_neuromorpho)