sparclclient 1.2.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sparcl/client.py ADDED
@@ -0,0 +1,869 @@
1
+ """Client module for SPARCL.
2
+ This module interfaces to the SPARC-Server to get spectra data.
3
+ """
4
+ # python -m unittest tests.tests_api
5
+
6
+ # ### Run tests against DEV
7
+ # serverurl=http://localhost:8050 python -m unittest tests.tests_api
8
+ #
9
+ # ### Run tests Against PAT Server.
10
+ # export serverurl=https://sparc1.datalab.noirlab.edu/
11
+ # python -m unittest tests.tests_api
12
+
13
+ #
14
+ # Doctest example:
15
+ # cd ~/sandbox/sparclclient
16
+ # activate
17
+ # python sparcl/client.py
18
+ # ## Returns NOTHING if everything works, else lists errors.
19
+
20
+ ############################################
21
+ # Python Standard Library
22
+ from urllib.parse import urlencode, urlparse
23
+ from warnings import warn
24
+ import pickle
25
+
26
+ #!from pathlib import Path
27
+ import tempfile
28
+
29
+ ############################################
30
+ # External Packages
31
+ import requests
32
+
33
+ ############################################
34
+ # Local Packages
35
+ from sparcl.fields import Fields
36
+ import sparcl.utils as ut
37
+ import sparcl.exceptions as ex
38
+
39
+ #!import sparcl.type_conversion as tc
40
+ from sparcl import __version__
41
+ from sparcl.Results import Found, Retrieved
42
+
43
+
44
+ MAX_CONNECT_TIMEOUT = 3.1 # seconds
45
+ MAX_READ_TIMEOUT = 150 * 60 # seconds
46
+ MAX_NUM_RECORDS_RETRIEVED = int(24e3) # Minimum Hard Limit = 25,000
47
+ #!MAX_NUM_RECORDS_RETRIEVED = int(5e4) #@@@ Reduce !!!
48
+
49
+
50
+ _pat_hosts = [
51
+ "sparc1.datalab.noirlab.edu",
52
+ "sparc2.datalab.noirlab.edu",
53
+ "astrosparcl.datalab.noirlab.edu",
54
+ ]
55
+
56
+ # Upload to PyPi:
57
+ # python3 -m build --wheel
58
+ # twine upload dist/*
59
+
60
+ # Use Google Style Python Docstrings so autogen of Sphinx doc works:
61
+ # https://www.sphinx-doc.org/en/master/usage/extensions/example_google.html
62
+ # https://sphinxcontrib-napoleon.readthedocs.io/en/latest/example_google.html
63
+ #
64
+ # Use sphinx-doc emacs minor mode to insert docstring skeleton.
65
+ # C-c M-d in function/method def
66
+
67
+ # ### Generate documentation:
68
+ # cd ~/sandbox/sparclclient
69
+ # sphinx-apidoc -f -o source sparcl
70
+ # make html
71
+ # firefox -new-tab "`pwd`/build/html/index.html"
72
+
73
+ # Using HTTPie (http://httpie.org):
74
+ # http :8030/sparc/version
75
+
76
+ # specids = [394069118933821440, 1355741587413428224, 1355617892355303424,
77
+ # 1355615143576233984, 1355661872820414464, 1355755331308775424,
78
+ # 1355716848401803264]
79
+ # client = client.SparclClient(url='http://localhost:8030/sparc')
80
+ # client.retrieve(specids)[0].keys() # >> dict_keys(['flux','loglam'])
81
+ #
82
+ # data0 = client.retrieve(specids,columns='flux')
83
+ # f'{len(str(data0)):,}' # -> '3,435,687'
84
+ #
85
+ # dataall = client.retrieve(specids,columns=allc)
86
+ # f'{len(str(dataall)):,}' # -> '27,470,052'
87
+
88
+ _PROD = "https://astrosparcl.datalab.noirlab.edu" # noqa: E221
89
+ _STAGE = "https://sparclstage.datalab.noirlab.edu" # noqa: E221
90
+ _PAT = "https://sparc1.datalab.noirlab.edu" # noqa: E221
91
+ _DEV = "http://localhost:8050" # noqa: E221
92
+
93
+
94
+ # client_version = pkg_resources.require("sparclclient")[0].version
95
+ client_version = __version__
96
+
97
+ DEFAULT = "DEFAULT"
98
+ ALL = "ALL"
99
+ RESERVED = set([DEFAULT, ALL])
100
+
101
+
102
+ ###########################
103
+ # ## Convenience Functions
104
+
105
+ # Following can be done with:
106
+ # set.intersection(*sets)
107
+ #
108
+ #!def intersection(*lists):
109
+ #! """Return intersection of all LISTS."""
110
+ #! return set(lists[0]).intersection(*lists[1:])
111
+
112
+
113
+
114
+ ###########################
115
+ # ## The Client class
116
+
117
+
118
+ class SparclClient: # was SparclApi()
119
+ """Provides interface to SPARCL Server.
120
+ When using this to report a bug, set verbose to True. Also print
121
+ your instance of this. The results will include important info
122
+ about the Client and Server that is usefule to Developers.
123
+
124
+ Args:
125
+ url (:obj:`str`, optional): Base URL of SPARCL Server. Defaults
126
+ to 'https://astrosparcl.datalab.noirlab.edu'.
127
+
128
+ verbose (:obj:`bool`, optional): Default verbosity is set to
129
+ False for all client methods.
130
+
131
+ connect_timeout (:obj:`float`, optional): Number of seconds to
132
+ wait to establish connection with server. Defaults to
133
+ 1.1.
134
+
135
+ read_timeout (:obj:`float`, optional): Number of seconds to
136
+ wait for server to send a response. Generally time to
137
+ wait for first byte. Defaults to 5400.
138
+
139
+ Example:
140
+ >>> client = SparclClient()
141
+
142
+ Raises:
143
+ Exception: Object creation compares the version from the
144
+ Server against the one expected by the Client. Throws an
145
+ error if the Client is a major version or more behind.
146
+
147
+ """
148
+
149
+ KNOWN_GOOD_API_VERSION = 11.0 # @@@ Change when Server version incremented
150
+
151
+ def __init__(
152
+ self,
153
+ *,
154
+ email=None,
155
+ password=None,
156
+ url=_PROD,
157
+ verbose=False,
158
+ show_curl=False,
159
+ connect_timeout=1.1, # seconds
160
+ read_timeout=90 * 60, # seconds
161
+ ):
162
+ """Create client instance."""
163
+ session = requests.Session()
164
+ self.session = session
165
+
166
+ self.session.auth = (email, password) if email and password else None
167
+ self.rooturl = url.rstrip("/") # eg. "http://localhost:8050"
168
+ self.apiurl = f"{self.rooturl}/sparc"
169
+ self.apiversion = None
170
+ self.verbose = verbose
171
+ self.show_curl = show_curl # Show CURL equivalent of client method
172
+ #!self.internal_names = internal_names
173
+ self.c_timeout = min(
174
+ MAX_CONNECT_TIMEOUT, float(connect_timeout)
175
+ ) # seconds
176
+ self.r_timeout = min(MAX_READ_TIMEOUT, float(read_timeout)) # seconds
177
+
178
+ # require response within this num seconds
179
+ # https://2.python-requests.org/en/master/user/advanced/#timeouts
180
+ # (connect timeout, read timeout) in seconds
181
+ self.timeout = (self.c_timeout, self.r_timeout)
182
+ # @@@ read timeout should be a function of the POST payload size
183
+
184
+ if verbose:
185
+ print(f"apiurl={self.apiurl}")
186
+
187
+ # Get API Version
188
+ try:
189
+ endpoint = f"{self.apiurl}/version/"
190
+ verstr = requests.get(endpoint, timeout=self.timeout).content
191
+ except requests.ConnectionError as err:
192
+ msg = f"Could not connect to {endpoint}. {str(err)}"
193
+ if urlparse(url).hostname in _pat_hosts:
194
+ msg += "Did you enable VPN?"
195
+ raise ex.ServerConnectionError(msg) from None # disable chaining
196
+
197
+ self.apiversion = float(verstr)
198
+
199
+ expected_api = SparclClient.KNOWN_GOOD_API_VERSION
200
+ if (int(self.apiversion) - int(expected_api)) >= 1:
201
+ msg = (
202
+ f"The SPARCL Client you are running expects an older "
203
+ f"version of the API services. "
204
+ f'Please upgrade to the latest "sparclclient". '
205
+ f"The Client you are using expected version "
206
+ f"{SparclClient.KNOWN_GOOD_API_VERSION} but got "
207
+ f"{self.apiversion} from the SPARCL Server "
208
+ f"at {self.apiurl}."
209
+ )
210
+ raise Exception(msg)
211
+ # self.session = requests.Session() #@@@
212
+
213
+ self.clientversion = client_version
214
+ self.fields = Fields(self.apiurl)
215
+
216
+ ###
217
+ ####################################################
218
+ # END __init__()
219
+
220
+ def __repr__(self):
221
+ #!f' internal_names={self.internal_names},'
222
+ return (
223
+ f"(sparclclient:{self.clientversion},"
224
+ f" api:{self.apiversion},"
225
+ f" {self.apiurl},"
226
+ f" client_hash={ut.githash()},"
227
+ f" verbose={self.verbose},"
228
+ f" connect_timeout={self.c_timeout},"
229
+ f" read_timeout={self.r_timeout})"
230
+ )
231
+
232
+ @property
233
+ def all_datasets(self):
234
+ """Set of all DataSets available from Server"""
235
+ return self.fields.all_drs
236
+
237
+ def get_default_fields(self, *, dataset_list=None):
238
+ """Get fields tagged as 'default' that are in DATASET_LIST.
239
+ These are the fields used for the DEFAULT value of the include
240
+ parameter of client.retrieve().
241
+
242
+ Args:
243
+ dataset_list (:obj:`list`, optional): List of data sets from
244
+ which to get the default fields. Defaults to None, which
245
+ will return the intersection of default fields in all
246
+ data sets hosted on the SPARCL database.
247
+
248
+ Returns:
249
+ List of fields tagged as 'default' from DATASET_LIST.
250
+
251
+ Example:
252
+ >>> client = SparclClient()
253
+ >>> client.get_default_fields()
254
+ ['dec', 'flux', 'ra', 'sparcl_id', 'specid', 'wavelength']
255
+ """
256
+
257
+ if dataset_list is None:
258
+ dataset_list = self.fields.all_drs
259
+
260
+ assert isinstance(
261
+ dataset_list, (list, set)
262
+ ), f"DATASET_LIST must be a list. Found {dataset_list}"
263
+
264
+ common = set(self.fields.common(dataset_list))
265
+ union = self.fields.default_retrieve_fields(dataset_list=dataset_list)
266
+ return sorted(common.intersection(union))
267
+
268
+ def get_all_fields(self, *, dataset_list=None):
269
+ """Get fields tagged as 'all' that are in DATASET_LIST.
270
+ These are the fields used for the ALL value of the include parameter
271
+ of client.retrieve().
272
+
273
+ Args:
274
+ dataset_list (:obj:`list`, optional): List of data sets from
275
+ which to get all fields. Defaults to None, which
276
+ will return the intersection of all fields in all
277
+ data sets hosted on the SPARCL database.
278
+
279
+ Returns:
280
+ List of fields tagged as 'all' from DATASET_LIST.
281
+
282
+ Example:
283
+ >>> client = SparclClient()
284
+ >>> client.get_all_fields()
285
+ ['data_release', 'datasetgroup', 'dateobs', 'dateobs_center', 'dec', 'exptime', 'flux', 'instrument', 'ivar', 'mask', 'model', 'ra', 'redshift', 'redshift_err', 'redshift_warning', 'site', 'sparcl_id', 'specid', 'specprimary', 'spectype', 'survey', 'targetid', 'telescope', 'wave_sigma', 'wavelength', 'wavemax', 'wavemin']
286
+ """ # noqa: E501
287
+
288
+ common = set(self.fields.common(dataset_list))
289
+ union = self.fields.all_retrieve_fields(dataset_list=dataset_list)
290
+ return sorted(common.intersection(union))
291
+
292
+ def _validate_science_fields(self, science_fields, *, dataset_list=None):
293
+ """Raise exception if any field name in SCIENCE_FIELDS is
294
+ not registered in at least one of DATASET_LIST."""
295
+ if dataset_list is None:
296
+ dataset_list = self.fields.all_drs
297
+ all = set(self.fields.common(dataset_list=dataset_list))
298
+ unk = set(science_fields) - all
299
+ if len(unk) > 0:
300
+ drs = self.fields.all_drs if dataset_list is None else dataset_list
301
+ msg = (
302
+ f'Unknown fields "{",".join(unk)}" given '
303
+ f'for DataSets {",".join(drs)}. '
304
+ f'Allowed fields are: {",".join(all)}. '
305
+ )
306
+ raise ex.UnknownField(msg)
307
+ return True
308
+
309
+ def _common_internal(self, *, science_fields=None, dataset_list=None):
310
+ self._validate_science_fields(
311
+ science_fields, dataset_list=dataset_list
312
+ )
313
+
314
+ if dataset_list is None:
315
+ dataset_list = self.fields.all_drs
316
+ if science_fields is None:
317
+ science_fields = self.fields.all_fields
318
+ common = self.fields.common_internal(dataset_list)
319
+ flds = set()
320
+ for dr in dataset_list:
321
+ for sn in science_fields:
322
+ flds.add(self.fields._internal_name(sn, dr))
323
+ return common.intersection(flds)
324
+
325
+ # Return Science Field Names (not Internal)
326
+ def get_available_fields(self, *, dataset_list=None):
327
+ """Get subset of fields that are in all (or selected) DATASET_LIST.
328
+ This may be a bigger list than will be used with the ALL keyword to
329
+ client.retreive().
330
+
331
+ Args:
332
+ dataset_list (:obj:`list`, optional): List of data sets from
333
+ which to get available fields. Defaults to None, which
334
+ will return the intersection of all available fields in
335
+ all data sets hosted on the SPARCL database.
336
+
337
+ Returns:
338
+ Set of fields available from data sets in DATASET_LIST.
339
+
340
+ Example:
341
+ >>> client = SparclClient()
342
+ >>> sorted(client.get_available_fields())
343
+ ['data_release', 'datasetgroup', 'dateobs', 'dateobs_center', 'dec', 'dirpath', 'exptime', 'extra_files', 'filename', 'filesize', 'flux', 'instrument', 'ivar', 'mask', 'model', 'ra', 'redshift', 'redshift_err', 'redshift_warning', 'site', 'sparcl_id', 'specid', 'specprimary', 'spectype', 'survey', 'targetid', 'telescope', 'updated', 'wave_sigma', 'wavelength', 'wavemax', 'wavemin']
344
+ """ # noqa: E501
345
+
346
+ drs = self.fields.all_drs if dataset_list is None else dataset_list
347
+ every = [set(self.fields.n2o[dr]) for dr in drs]
348
+ return set.intersection(*every)
349
+
350
+ @property
351
+ def version(self):
352
+ """Return version of Server Rest API used by this client.
353
+ If the Rest API changes such that the Major version increases,
354
+ a new version of this module will likely need to be used.
355
+
356
+ Returns:
357
+ API version (:obj:`float`).
358
+
359
+ Example:
360
+ >>> client = SparclClient()
361
+ >>> client.version
362
+ 9.0
363
+ """
364
+
365
+ if self.apiversion is None:
366
+ response = requests.get(
367
+ f"{self.apiurl}/version", timeout=self.timeout, cache=True
368
+ )
369
+ self.apiversion = float(response.content)
370
+ return self.apiversion
371
+
372
+ def find(
373
+ self,
374
+ outfields=None,
375
+ *,
376
+ constraints={}, # dict(fname) = [op, param, ...]
377
+ # dataset_list=None,
378
+ limit=500,
379
+ sort=None,
380
+ verbose=None,
381
+ ):
382
+ """Find records in the SPARCL database.
383
+
384
+ Args:
385
+ outfields (:obj:`list`, optional): List of fields to return.
386
+ Only CORE fields may be passed to this parameter.
387
+ Defaults to None, which will return only the sparcl_id
388
+ and _dr fields.
389
+
390
+ constraints (:obj:`dict`, optional): Key-Value pairs of
391
+ constraints to place on the record selection. The Key
392
+ part of the Key-Value pair is the field name and the
393
+ Value part of the Key-Value pair is a list of values.
394
+ Defaults to no constraints. This will return all records in the
395
+ database subject to restrictions imposed by the ``limit``
396
+ parameter.
397
+
398
+ limit (:obj:`int`, optional): Maximum number of records to
399
+ return. Defaults to 500.
400
+
401
+ sort (:obj:`list`, optional): Comma separated list of fields
402
+ to sort by. Defaults to None. (no sorting)
403
+
404
+ verbose (:obj:`bool`, optional): Set to True for in-depth return
405
+ statement. Defaults to False.
406
+
407
+ Returns:
408
+ :class:`~sparcl.Results.Found`: Contains header and records.
409
+
410
+ Example:
411
+ >>> client = SparclClient()
412
+ >>> outs = ['sparcl_id', 'ra', 'dec']
413
+ >>> cons = {'spectype': ['GALAXY'], 'redshift': [0.5, 0.9]}
414
+ >>> found = client.find(outfields=outs, constraints=cons)
415
+ >>> sorted(list(found.records[0].keys()))
416
+ ['_dr', 'dec', 'ra', 'sparcl_id']
417
+ """
418
+ # dataset_list (:obj:`list`, optional): List of data sets from
419
+ # which to find records. Defaults to None, which
420
+ # will find records in all data sets hosted on the SPARC
421
+ # database.
422
+
423
+ verbose = self.verbose if verbose is None else verbose
424
+
425
+ # Let "outfields" default to ['id']; but fld may have been renamed
426
+ if outfields is None:
427
+ outfields = ["sparcl_id"]
428
+ dataset_list = self.fields.all_drs
429
+ #! self._validate_science_fields(outfields,
430
+ #! dataset_list=dataset_list) # DLS-401
431
+ dr = list(dataset_list)[0]
432
+ if len(constraints) > 0:
433
+ self._validate_science_fields(
434
+ constraints.keys(), dataset_list=dataset_list
435
+ )
436
+ constraints = {
437
+ self.fields._internal_name(k, dr): v
438
+ for k, v in constraints.items()
439
+ }
440
+ uparams = dict(
441
+ limit=limit,
442
+ )
443
+ if sort is not None:
444
+ uparams["sort"] = sort
445
+ qstr = urlencode(uparams)
446
+ url = f"{self.apiurl}/find/?{qstr}"
447
+
448
+ outfields = [self.fields._internal_name(s, dr) for s in outfields]
449
+ search = [[k] + v for k, v in constraints.items()]
450
+ sspec = dict(outfields=outfields, search=search)
451
+
452
+ if verbose:
453
+ print(f"url={url} sspec={sspec}")
454
+ if self.show_curl:
455
+ cmd = ut.curl_find_str(sspec, self.rooturl, qstr=qstr)
456
+ print(cmd)
457
+
458
+ res = requests.post(url, json=sspec, timeout=self.timeout)
459
+
460
+ if res.status_code != 200:
461
+ if verbose and ("traceback" in res.json()):
462
+ print(f'DBG: Server traceback=\n{res.json()["traceback"]}')
463
+ raise ex.genSparclException(res, verbose=self.verbose)
464
+
465
+ found = Found(res.json(), client=self)
466
+ if verbose:
467
+ print(f"Record key counts: {ut.count_values(found.records)}")
468
+ return found
469
+
470
+ def missing(
471
+ self, uuid_list, *, dataset_list=None, countOnly=False, verbose=False
472
+ ):
473
+ """Return the subset of sparcl_ids in the given uuid_list that are
474
+ NOT stored in the SPARCL database.
475
+
476
+ Args:
477
+ uuid_list (:obj:`list`): List of sparcl_ids.
478
+
479
+ dataset_list (:obj:`list`, optional): List of data sets from
480
+ which to find missing sparcl_ids. Defaults to None, meaning
481
+ all data sets hosted on the SPARCL database.
482
+
483
+ countOnly (:obj:`bool`, optional): Set to True to return only
484
+ a count of the missing sparcl_ids from the uuid_list.
485
+ Defaults to False.
486
+
487
+ verbose (:obj:`bool`, optional): Set to True for in-depth return
488
+ statement. Defaults to False.
489
+
490
+ Returns:
491
+ A list of the subset of sparcl_ids in the given uuid_list that
492
+ are NOT stored in the SPARCL database.
493
+
494
+ Example:
495
+ >>> client = SparclClient()
496
+ >>> ids = ['ddbb57ee-8e90-4a0d-823b-0f5d97028076',]
497
+ >>> client.missing(ids)
498
+ ['ddbb57ee-8e90-4a0d-823b-0f5d97028076']
499
+ """
500
+
501
+ if dataset_list is None:
502
+ dataset_list = self.fields.all_drs
503
+ assert isinstance(
504
+ dataset_list, (list, set)
505
+ ), f"DATASET_LIST must be a list. Found {dataset_list}"
506
+
507
+ verbose = verbose or self.verbose
508
+ uparams = dict(dataset_list=",".join(dataset_list))
509
+ qstr = urlencode(uparams)
510
+ url = f"{self.apiurl}/missing/?{qstr}"
511
+ uuids = list(uuid_list)
512
+ if verbose:
513
+ print(f'Using url="{url}"')
514
+ res = requests.post(url, json=uuids, timeout=self.timeout)
515
+
516
+ res.raise_for_status()
517
+ if res.status_code != 200:
518
+ raise Exception(res)
519
+ ret = res.json()
520
+ return ret
521
+ # END missing()
522
+
523
+ def missing_specids(
524
+ self, specid_list, *, dataset_list=None, countOnly=False, verbose=False
525
+ ):
526
+ """Return the subset of specids in the given specid_list that are
527
+ NOT stored in the SPARCL database.
528
+
529
+ Args:
530
+ specid_list (:obj:`list`): List of specids.
531
+
532
+ dataset_list (:obj:`list`, optional): List of data sets from
533
+ which to find missing specids. Defaults to None, meaning
534
+ all data sets hosted on the SPARCL database.
535
+
536
+ countOnly (:obj:`bool`, optional): Set to True to return only
537
+ a count of the missing specids from the specid_list.
538
+ Defaults to False.
539
+
540
+ verbose (:obj:`bool`, optional): Set to True for in-depth return
541
+ statement. Defaults to False.
542
+
543
+ Returns:
544
+ A list of the subset of specids in the given specid_list that
545
+ are NOT stored in the SPARCL database.
546
+
547
+ Example:
548
+ >>> client = SparclClient(url=_PAT)
549
+ >>> specids = ['7972592460248666112', '3663710814482833408']
550
+ >>> client.missing_specids(specids + ['bad_id'])
551
+ ['bad_id']
552
+ """
553
+ if dataset_list is None:
554
+ dataset_list = self.fields.all_drs
555
+ assert isinstance(
556
+ dataset_list, (list, set)
557
+ ), f"DATASET_LIST must be a list. Found {dataset_list}"
558
+
559
+ verbose = verbose or self.verbose
560
+ uparams = dict(dataset_list=",".join(dataset_list))
561
+ qstr = urlencode(uparams)
562
+ url = f"{self.apiurl}/missing_specids/?{qstr}"
563
+ specids = list(specid_list)
564
+ if verbose:
565
+ print(f'Using url="{url}"')
566
+ res = requests.post(url, json=specids, timeout=self.timeout)
567
+
568
+ res.raise_for_status()
569
+ if res.status_code != 200:
570
+ raise Exception(res)
571
+ ret = res.json()
572
+ return ret
573
+ # END missing_specids()
574
+
575
+ # Include fields are Science (not internal) names. But the mapping
576
+ # of Internal to Science name depends on DataSet. Its possible
577
+ # for a field (Science name) to be valid in one DataSet but not
578
+ # another. For the include_list to be valid, all fields must be
579
+ # valid Science field names for all DS in given dataset_list.
580
+ # (defaults to all DataSets ingested)
581
+ def _validate_include(self, include_list, dataset_list):
582
+ if not isinstance(include_list, (list, set)):
583
+ msg = f"Bad INCLUDE_LIST. Must be list. Got {include_list}"
584
+ raise ex.BadInclude(msg)
585
+
586
+ avail_science = self.get_available_fields(dataset_list=dataset_list)
587
+ inc_set = set(include_list)
588
+ unknown = inc_set.difference(avail_science)
589
+ if len(unknown) > 0:
590
+ msg = (
591
+ f'The INCLUDE list ({",".join(sorted(include_list))}) '
592
+ f"contains invalid data field names "
593
+ f'for Data Sets ({",".join(sorted(dataset_list))}). '
594
+ f"Unknown fields are: "
595
+ f'{", ".join(sorted(list(unknown)))}. '
596
+ f"Available fields are: "
597
+ f'{", ".join(sorted(avail_science))}.'
598
+ )
599
+ raise ex.BadInclude(msg)
600
+ return True
601
+
602
+ def retrieve( # noqa: C901
603
+ self,
604
+ uuid_list,
605
+ *,
606
+ include="DEFAULT",
607
+ dataset_list=None,
608
+ limit=500,
609
+ verbose=None,
610
+ ):
611
+ """Retrieve spectra records from the SPARCL database by list of
612
+ sparcl_ids.
613
+
614
+ Args:
615
+ uuid_list (:obj:`list`): List of sparcl_ids.
616
+
617
+ include (:obj:`list`, optional): List of field names to include
618
+ in each record. Defaults to 'DEFAULT', which will return
619
+ the fields tagged as 'default'.
620
+
621
+ dataset_list (:obj:`list`, optional): List of data sets from
622
+ which to retrieve spectra data. Defaults to None, meaning all
623
+ data sets hosted on the SPARCL database.
624
+
625
+ limit (:obj:`int`, optional): Maximum number of records to
626
+ return. Defaults to 500. Maximum allowed is 24,000.
627
+
628
+ verbose (:obj:`bool`, optional): Set to True for in-depth return
629
+ statement. Defaults to False.
630
+
631
+ Returns:
632
+ :class:`~sparcl.Results.Retrieved`: Contains header and records.
633
+
634
+ Example:
635
+ >>> client = SparclClient()
636
+ >>> ids = ['00000f0b-07db-4234-892a-6e347db79c89',]
637
+ >>> inc = ['sparcl_id', 'flux', 'wavelength', 'model']
638
+ >>> ret = client.retrieve(uuid_list=ids, include=inc)
639
+ >>> type(ret.records[0].wavelength)
640
+ <class 'numpy.ndarray'>
641
+ """
642
+
643
+ # Variants for async, etc.
644
+ #
645
+ # From "performance testing" docstring
646
+ # svc (:obj:`str`, optional): Defaults to 'spectras'.
647
+ #
648
+ # format (:obj:`str`, optional): Defaults to 'pkl'.
649
+ #
650
+ #
651
+ # chunk (:obj:`int`, optional): Size of chunks to break list into.
652
+ # Defaults to 500.
653
+ #
654
+ # These were keyword params:
655
+ svc = "spectras" # retrieve, spectras
656
+ format = "pkl" # 'json',
657
+ chunk = 500
658
+
659
+ if dataset_list is None:
660
+ dataset_list = self.fields.all_drs
661
+ assert isinstance(
662
+ dataset_list, (list, set)
663
+ ), f"DATASET_LIST must be a list. Found {dataset_list}"
664
+
665
+ verbose = self.verbose if verbose is None else verbose
666
+
667
+ if (include == DEFAULT) or (include is None) or include == []:
668
+ include_list = self.get_default_fields(dataset_list=dataset_list)
669
+ elif include == ALL:
670
+ include_list = self.get_all_fields(dataset_list=dataset_list)
671
+ else:
672
+ include_list = include
673
+
674
+ self._validate_include(include_list, dataset_list)
675
+
676
+ req_num = min(len(uuid_list), (limit or len(uuid_list)))
677
+ #! print(f'DBG: req_num = {req_num:,d}'
678
+ #! f' len(uuid_list)={len(uuid_list):,d}'
679
+ #! f' limit={limit}'
680
+ #! f' MAX_NUM_RECORDS_RETRIEVED={MAX_NUM_RECORDS_RETRIEVED:,d}')
681
+ if req_num > MAX_NUM_RECORDS_RETRIEVED:
682
+ msg = (
683
+ f"Too many records asked for with client.retrieve()."
684
+ f" {len(uuid_list):,d} IDs provided,"
685
+ f" limit={limit}."
686
+ f" But the maximum allowed is"
687
+ f" {MAX_NUM_RECORDS_RETRIEVED:,d}."
688
+ )
689
+ raise ex.TooManyRecords(msg)
690
+
691
+ com_include = self._common_internal(
692
+ science_fields=include_list, dataset_list=dataset_list
693
+ )
694
+ uparams = {
695
+ "include": ",".join(com_include),
696
+ # limit=limit, # altered uuid_list to reflect limit
697
+ #! "chunk_len": chunk,
698
+ "format": format,
699
+ #! "1thread": "yes", # @@@ 7.3.2023
700
+ "dataset_list": ",".join(dataset_list),
701
+ }
702
+ qstr = urlencode(uparams)
703
+
704
+ #!url = f'{self.apiurl}/retrieve/?{qstr}'
705
+ url = f"{self.apiurl}/{svc}/?{qstr}"
706
+ if verbose:
707
+ print(f'Using url="{url}"')
708
+ ut.tic()
709
+
710
+ ids = list(uuid_list) if limit is None else list(uuid_list)[:limit]
711
+ if self.show_curl:
712
+ cmd = ut.curl_retrieve_str(ids, self.rooturl, svc=svc, qstr=qstr)
713
+ print(cmd)
714
+
715
+ try:
716
+ res = requests.post(
717
+ url, json=ids, auth=self.session.auth, timeout=self.timeout
718
+ )
719
+ except requests.exceptions.ConnectTimeout as reCT:
720
+ raise ex.UnknownSparcl(f"ConnectTimeout: {reCT}")
721
+ except requests.exceptions.ReadTimeout as reRT:
722
+ msg = (
723
+ f'Try increasing the value of the "read_timeout" parameter'
724
+ f' to "SparclClient()".'
725
+ f" The current values is: {self.r_timeout} (seconds)"
726
+ f"{reRT}"
727
+ )
728
+ raise ex.ReadTimeout(msg) from None
729
+ except requests.exceptions.ConnectionError as reCE:
730
+ raise ex.UnknownSparcl(f"ConnectionError: {reCE}")
731
+ except requests.exceptions.TooManyRedirects as reTMR:
732
+ raise ex.UnknownSparcl(f"TooManyRedirects: {reTMR}")
733
+ except requests.exceptions.HTTPError as reHTTP:
734
+ raise ex.UnknownSparcl(f"HTTPError: {reHTTP}")
735
+ except requests.exceptions.URLRequired as reUR:
736
+ raise ex.UnknownSparcl(f"URLRequired: {reUR}")
737
+ except requests.exceptions.RequestException as reRE:
738
+ raise ex.UnknownSparcl(f"RequestException: {reRE}")
739
+ except Exception as err: # fall through
740
+ raise ex.UnknownSparcl(err)
741
+
742
+ if verbose:
743
+ elapsed = ut.toc()
744
+ print(f"Got response to post in {elapsed} seconds")
745
+ if res.status_code != 200:
746
+ if verbose:
747
+ print(f"DBG: Server response=\n{res.text}")
748
+ # @@@ FAILS on invalid JSON. Maybe not json at all !!!
749
+ if verbose and ("traceback" in res.json()):
750
+ print(f'DBG: Server traceback=\n{res.json()["traceback"]}')
751
+ raise ex.genSparclException(res, verbose=verbose)
752
+
753
+ if format == "json":
754
+ results = res.json()
755
+ elif format == "pkl":
756
+ # Read chunked binary file (representing pickle file) from
757
+ # server response. Load pickle into python data structure.
758
+ # Python structure is list of records where first element
759
+ # is a header.
760
+ with tempfile.TemporaryFile(mode="w+b") as fp:
761
+ for idx, chunk in enumerate(res.iter_content(chunk_size=None)):
762
+ fp.write(chunk)
763
+ # Position to start of file for pickle reading (load)
764
+ fp.seek(0)
765
+ results = pickle.load(fp)
766
+ else:
767
+ results = res.json()
768
+
769
+ meta = results[0]
770
+ if verbose:
771
+ count = len(results) - 1
772
+ print(
773
+ f"Got {count} spectra in "
774
+ f"{elapsed:.2f} seconds ({count/elapsed:.0f} "
775
+ "spectra/sec)"
776
+ )
777
+ print(f'{meta["status"]}')
778
+
779
+ if len(meta["status"].get("warnings", [])) > 0:
780
+ warn(f"{'; '.join(meta['status'].get('warnings'))}", stacklevel=2)
781
+
782
+ return Retrieved(results, client=self)
783
+
784
+ def retrieve_by_specid(
785
+ self,
786
+ specid_list,
787
+ *,
788
+ svc="spectras", # 'retrieve',
789
+ format="pkl", # 'json',
790
+ include="DEFAULT",
791
+ dataset_list=None,
792
+ limit=500,
793
+ verbose=False,
794
+ ):
795
+ """Retrieve spectra records from the SPARCL database by list of
796
+ specids.
797
+
798
+ Args:
799
+ specid_list (:obj:`list`): List of specids.
800
+
801
+ include (:obj:`list`, optional): List of field names to include
802
+ in each record. Defaults to 'DEFAULT', which will return
803
+ the fields tagged as 'default'.
804
+
805
+ dataset_list (:obj:`list`, optional): List of data sets from
806
+ which to retrieve spectra data. Defaults to None, meaning all
807
+ data sets hosted on the SPARCL database.
808
+
809
+ limit (:obj:`int`, optional): Maximum number of records to
810
+ return. Defaults to 500. Maximum allowed is 24,000.
811
+
812
+ verbose (:obj:`bool`, optional): Set to True for in-depth return
813
+ statement. Defaults to False.
814
+
815
+ Returns:
816
+ :class:`~sparcl.Results.Retrieved`: Contains header and records.
817
+
818
+ Example:
819
+ >>> client = SparclClient()
820
+ >>> sids = [5840097619402313728, -8985592895187431424]
821
+ >>> inc = ['specid', 'flux', 'wavelength', 'model']
822
+ >>> ret = client.retrieve_by_specid(specid_list=sids, include=inc)
823
+ >>> len(ret.records[0].wavelength)
824
+ 4617
825
+
826
+ """
827
+ #!specid_list = list(specid_list)
828
+ assert isinstance(specid_list, list), (
829
+ f'The "specid_list" parameter must be a python list. '
830
+ f"You used a value of type {type(specid_list)}."
831
+ )
832
+ assert (
833
+ len(specid_list) > 0
834
+ ), f'The "specid_list" parameter value must be a non-empty list'
835
+ assert isinstance(specid_list[0], int), (
836
+ f'The "specid_list" parameter must be a python list of INTEGERS. '
837
+ f"You used an element value of type {type(specid_list[0])}."
838
+ )
839
+
840
+ if dataset_list is None:
841
+ constraints = {"specid": specid_list}
842
+ else:
843
+ constraints = {"specid": specid_list, "data_release": dataset_list}
844
+
845
+ # Science Field Name for uuid.
846
+ dr = list(self.fields.all_drs)[0]
847
+ idfld = self.fields._science_name("sparcl_id", dr)
848
+
849
+ found = self.find([idfld], constraints=constraints, limit=limit)
850
+ if verbose:
851
+ print(f"Found {found.count} matches.")
852
+ res = self.retrieve(
853
+ found.ids,
854
+ #! svc=svc,
855
+ #! format=format,
856
+ include=include,
857
+ dataset_list=dataset_list,
858
+ limit=limit,
859
+ verbose=verbose,
860
+ )
861
+ if verbose:
862
+ print(f"Got {res.count} records.")
863
+ return res
864
+
865
+
866
+ if __name__ == "__main__":
867
+ import doctest
868
+
869
+ doctest.testmod()