sparclclient 1.2.0b3.dev8__py3-none-any.whl → 1.2.0b4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- sparcl/Results.py +1 -1
- sparcl/__init__.py +2 -2
- sparcl/big_retrieve.py +22 -0
- sparcl/client.py +71 -7
- sparcl/dls_376.py +25 -0
- sparcl/fields.py +2 -1
- sparcl/gather_2d.py +116 -59
- sparcl/resample_spectra.py +36 -0
- sparcl/utils.py +20 -1
- {sparclclient-1.2.0b3.dev8.dist-info → sparclclient-1.2.0b4.dist-info}/METADATA +2 -2
- sparclclient-1.2.0b4.dist-info/RECORD +20 -0
- sparclclient-1.2.0b3.dev8.dist-info/RECORD +0 -17
- {sparclclient-1.2.0b3.dev8.dist-info → sparclclient-1.2.0b4.dist-info}/LICENSE +0 -0
- {sparclclient-1.2.0b3.dev8.dist-info → sparclclient-1.2.0b4.dist-info}/WHEEL +0 -0
- {sparclclient-1.2.0b3.dev8.dist-info → sparclclient-1.2.0b4.dist-info}/top_level.txt +0 -0
sparcl/Results.py
CHANGED
sparcl/__init__.py
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
# List of packages to import when "from sparcl import *" is used
|
|
2
|
-
__all__ = ["client"]
|
|
2
|
+
__all__ = ["client", "align_records"]
|
|
3
3
|
|
|
4
4
|
|
|
5
5
|
# See semantic versioning
|
|
@@ -31,4 +31,4 @@ __all__ = ["client"]
|
|
|
31
31
|
#__version__ = '1.1rc1'
|
|
32
32
|
#__version__ = '1.1rc2'
|
|
33
33
|
#__version__ = '1.1'
|
|
34
|
-
__version__ = '1.2.
|
|
34
|
+
__version__ = '1.2.0b4'
|
sparcl/big_retrieve.py
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
from sparcl.client import SparclClient
|
|
2
|
+
|
|
3
|
+
def tt(num=100):
|
|
4
|
+
print(f'Asking for {num:,d} records.')
|
|
5
|
+
client = SparclClient(url='https://sparc1.datalab.noirlab.edu',
|
|
6
|
+
verbose=True)
|
|
7
|
+
print(f'Client={client}')
|
|
8
|
+
# Client=(sparclclient:1.2.0b3.dev3,
|
|
9
|
+
# api:8.0,
|
|
10
|
+
# https://sparc1.datalab.noirlab.edu/sparc,
|
|
11
|
+
# verbose=True, connect_timeout=1.1, read_timeout=5400)
|
|
12
|
+
|
|
13
|
+
print(f'RUN client.find() for up to {num:,d} records.')
|
|
14
|
+
found = client.find(outfields=['id'], limit=num)
|
|
15
|
+
|
|
16
|
+
print(f'Found {found.count:,d} records.')
|
|
17
|
+
#! inc = ['id', 'data_release', 'flux', 'wavelength', 'spectype']
|
|
18
|
+
print('RUN client.retrieve()')
|
|
19
|
+
res = client.retrieve(uuid_list=found.ids, limit=num)
|
|
20
|
+
|
|
21
|
+
print(f'Retrieved {res.count:,d} records.')
|
|
22
|
+
return found
|
sparcl/client.py
CHANGED
|
@@ -6,7 +6,6 @@ This module interfaces to the SPARC-Server to get spectra data.
|
|
|
6
6
|
# Doctest example:
|
|
7
7
|
# cd ~/sandbox/sparclclient
|
|
8
8
|
# activate
|
|
9
|
-
# pip install -e .
|
|
10
9
|
# python sparcl/client.py
|
|
11
10
|
# ## Returns NOTHING if everything works, else lists errors.
|
|
12
11
|
|
|
@@ -131,7 +130,7 @@ class SparclClient(): # was SparclApi()
|
|
|
131
130
|
|
|
132
131
|
"""
|
|
133
132
|
|
|
134
|
-
KNOWN_GOOD_API_VERSION =
|
|
133
|
+
KNOWN_GOOD_API_VERSION = 9.0 # @@@ Change this on Server version increment
|
|
135
134
|
|
|
136
135
|
def __init__(self, *,
|
|
137
136
|
url=_PROD,
|
|
@@ -338,7 +337,8 @@ class SparclClient(): # was SparclApi()
|
|
|
338
337
|
constraints={}, # dict(fname) = [op, param, ...]
|
|
339
338
|
#dataset_list=None,
|
|
340
339
|
limit=500,
|
|
341
|
-
sort=None
|
|
340
|
+
sort=None,
|
|
341
|
+
verbose=None):
|
|
342
342
|
"""Find records in the SPARC database.
|
|
343
343
|
|
|
344
344
|
Args:
|
|
@@ -361,6 +361,9 @@ class SparclClient(): # was SparclApi()
|
|
|
361
361
|
sort (:obj:`list`, optional): Comma separated list of fields
|
|
362
362
|
to sort by. Defaults to None. (no sorting)
|
|
363
363
|
|
|
364
|
+
verbose (:obj:`bool`, optional): Set to True for in-depth return
|
|
365
|
+
statement. Defaults to False.
|
|
366
|
+
|
|
364
367
|
Returns:
|
|
365
368
|
:class:`~sparcl.Results.Found`: Contains header and records.
|
|
366
369
|
|
|
@@ -377,6 +380,8 @@ class SparclClient(): # was SparclApi()
|
|
|
377
380
|
# will find records in all data sets hosted on the SPARC
|
|
378
381
|
# database.
|
|
379
382
|
|
|
383
|
+
verbose = self.verbose if verbose is None else verbose
|
|
384
|
+
|
|
380
385
|
# Let "outfields" default to ['id']; but fld may have been renamed
|
|
381
386
|
if outfields is None:
|
|
382
387
|
dslist = list(self.fields.all_datasets)
|
|
@@ -389,7 +394,8 @@ class SparclClient(): # was SparclApi()
|
|
|
389
394
|
raise ex.NoCommonIdField(msg)
|
|
390
395
|
outfields = [idfld]
|
|
391
396
|
dataset_list = self.fields.all_drs
|
|
392
|
-
self._validate_science_fields(outfields,
|
|
397
|
+
#! self._validate_science_fields(outfields,
|
|
398
|
+
#! dataset_list=dataset_list) # DLS-401
|
|
393
399
|
dr = list(dataset_list)[0]
|
|
394
400
|
if len(constraints) > 0:
|
|
395
401
|
self._validate_science_fields(constraints.keys(),
|
|
@@ -401,17 +407,23 @@ class SparclClient(): # was SparclApi()
|
|
|
401
407
|
uparams['sort'] = sort
|
|
402
408
|
qstr = urlencode(uparams)
|
|
403
409
|
url = f'{self.apiurl}/find/?{qstr}'
|
|
410
|
+
|
|
404
411
|
outfields = [self.fields._internal_name(s, dr) for s in outfields]
|
|
405
412
|
search = [[k] + v for k, v in constraints.items()]
|
|
406
413
|
sspec = dict(outfields=outfields, search=search)
|
|
414
|
+
if verbose:
|
|
415
|
+
print(f'url={url} sspec={sspec}')
|
|
407
416
|
res = requests.post(url, json=sspec, timeout=self.timeout)
|
|
408
417
|
|
|
409
418
|
if res.status_code != 200:
|
|
410
|
-
if
|
|
419
|
+
if verbose and ('traceback' in res.json()):
|
|
411
420
|
print(f'DBG: Server traceback=\n{res.json()["traceback"]}')
|
|
412
421
|
raise ex.genSparclException(res, verbose=self.verbose)
|
|
413
422
|
|
|
414
|
-
|
|
423
|
+
found = Found(res.json(), client=self)
|
|
424
|
+
if verbose:
|
|
425
|
+
print(f'Record key counts: {ut.count_values(found.records)}')
|
|
426
|
+
return found
|
|
415
427
|
|
|
416
428
|
def missing(self, uuid_list, *, dataset_list=None,
|
|
417
429
|
countOnly=False, verbose=False):
|
|
@@ -464,6 +476,56 @@ class SparclClient(): # was SparclApi()
|
|
|
464
476
|
return ret
|
|
465
477
|
# END missing()
|
|
466
478
|
|
|
479
|
+
def missing_specids(self, specid_list, *, dataset_list=None,
|
|
480
|
+
countOnly=False, verbose=False):
|
|
481
|
+
"""Return the subset of specids in the given specid_list that are
|
|
482
|
+
NOT stored in the SPARC database.
|
|
483
|
+
|
|
484
|
+
Args:
|
|
485
|
+
specid_list (:obj:`list`): List of specids.
|
|
486
|
+
|
|
487
|
+
dataset_list (:obj:`list`, optional): List of data sets from
|
|
488
|
+
which to find missing specids. Defaults to None, meaning
|
|
489
|
+
all data sets hosted on the SPARC database.
|
|
490
|
+
|
|
491
|
+
countOnly (:obj:`bool`, optional): Set to True to return only
|
|
492
|
+
a count of the missing specids from the specid_list.
|
|
493
|
+
Defaults to False.
|
|
494
|
+
|
|
495
|
+
verbose (:obj:`bool`, optional): Set to True for in-depth return
|
|
496
|
+
statement. Defaults to False.
|
|
497
|
+
|
|
498
|
+
Returns:
|
|
499
|
+
A list of the subset of specids in the given specid_list that
|
|
500
|
+
are NOT stored in the SPARC database.
|
|
501
|
+
|
|
502
|
+
Example:
|
|
503
|
+
>>> client = SparclClient(url=_PAT)
|
|
504
|
+
>>> specids = ['7972592460248666112', '3663710814482833408']
|
|
505
|
+
>>> client.missing_specids(specids + ['bad_id'])
|
|
506
|
+
['bad_id']
|
|
507
|
+
"""
|
|
508
|
+
if dataset_list is None:
|
|
509
|
+
dataset_list = self.fields.all_drs
|
|
510
|
+
assert isinstance(dataset_list, (list, set)), (
|
|
511
|
+
f'DATASET_LIST must be a list. Found {dataset_list}')
|
|
512
|
+
|
|
513
|
+
verbose = verbose or self.verbose
|
|
514
|
+
uparams = dict(dataset_list=','.join(dataset_list))
|
|
515
|
+
qstr = urlencode(uparams)
|
|
516
|
+
url = f'{self.apiurl}/missing_specids/?{qstr}'
|
|
517
|
+
specids = list(specid_list)
|
|
518
|
+
if verbose:
|
|
519
|
+
print(f'Using url="{url}"')
|
|
520
|
+
res = requests.post(url, json=specids, timeout=self.timeout)
|
|
521
|
+
|
|
522
|
+
res.raise_for_status()
|
|
523
|
+
if res.status_code != 200:
|
|
524
|
+
raise Exception(res)
|
|
525
|
+
ret = res.json()
|
|
526
|
+
return ret
|
|
527
|
+
# END missing_specids()
|
|
528
|
+
|
|
467
529
|
# Include fields are Science (not internal) names. But the mapping
|
|
468
530
|
# of Internal to Science name depends on DataSet. Its possible
|
|
469
531
|
# for a field (Science name) to be valid in one DataSet but not
|
|
@@ -656,6 +718,7 @@ class SparclClient(): # was SparclApi()
|
|
|
656
718
|
format='pkl', # 'json',
|
|
657
719
|
include='DEFAULT',
|
|
658
720
|
dataset_list=None,
|
|
721
|
+
limit=500,
|
|
659
722
|
verbose=False):
|
|
660
723
|
"""Retrieve spectra records from the SPARC database by list of specids.
|
|
661
724
|
|
|
@@ -705,7 +768,7 @@ class SparclClient(): # was SparclApi()
|
|
|
705
768
|
dr = list(self.fields.all_drs)[0]
|
|
706
769
|
idfld = self.fields._science_name('id', dr)
|
|
707
770
|
|
|
708
|
-
found = self.find([idfld], constraints=constraints)
|
|
771
|
+
found = self.find([idfld], constraints=constraints, limit=limit)
|
|
709
772
|
if verbose:
|
|
710
773
|
print(f'Found {found.count} matches.')
|
|
711
774
|
res = self.retrieve(found.ids,
|
|
@@ -713,6 +776,7 @@ class SparclClient(): # was SparclApi()
|
|
|
713
776
|
format=format,
|
|
714
777
|
include=include,
|
|
715
778
|
dataset_list=dataset_list,
|
|
779
|
+
limit=limit,
|
|
716
780
|
verbose=verbose)
|
|
717
781
|
if verbose:
|
|
718
782
|
print(f'Got {res.count} records.')
|
sparcl/dls_376.py
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
import sparcl.client
|
|
2
|
+
MAXR = sparcl.client.MAX_NUM_RECORDS_RETRIEVED
|
|
3
|
+
server = 'https://sparc1.datalab.noirlab.edu'
|
|
4
|
+
|
|
5
|
+
for num in [100, MAXR+1, MAXR]:
|
|
6
|
+
print('####################################################')
|
|
7
|
+
print(f'### Asking for {num:,d} records. Max allowed = {MAXR:,d}')
|
|
8
|
+
|
|
9
|
+
client = sparcl.client.SparclClient(url=server, verbose=True)
|
|
10
|
+
print(f'Client={client}')
|
|
11
|
+
|
|
12
|
+
print(f'RUN client.find() for up to {num:,d} records.')
|
|
13
|
+
found = client.find(outfields=['id'], limit=num)
|
|
14
|
+
print(f'Found {found.count:,d} records.')
|
|
15
|
+
|
|
16
|
+
try:
|
|
17
|
+
print(f'RUN client.retrieve({found.count}, limit={num}) # DEFAULT include')
|
|
18
|
+
res = client.retrieve(uuid_list=found.ids, limit=num)
|
|
19
|
+
print(f'Retrieved {res.count:,d} records.')
|
|
20
|
+
print()
|
|
21
|
+
except Exception as err:
|
|
22
|
+
msg = f'Failed retrieve: {err}'
|
|
23
|
+
print(msg)
|
|
24
|
+
print()
|
|
25
|
+
continue
|
sparcl/fields.py
CHANGED
|
@@ -86,7 +86,8 @@ class Fields(): # Derived from a single query
|
|
|
86
86
|
return self.o2n[dataset].get(internal_name)
|
|
87
87
|
|
|
88
88
|
def _internal_name(self, science_name, dataset):
|
|
89
|
-
return self.n2o[dataset][science_name]
|
|
89
|
+
#!return self.n2o[dataset][science_name]
|
|
90
|
+
return self.n2o[dataset].get(science_name)
|
|
90
91
|
|
|
91
92
|
def filter_fields(self, attr, dataset_list):
|
|
92
93
|
fields = set()
|
sparcl/gather_2d.py
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
|
-
|
|
2
|
-
#
|
|
1
|
+
"""Align or resample spectra related fields across multiple records."""
|
|
2
|
+
# See client.py for Doctest example
|
|
3
|
+
#
|
|
3
4
|
# For info about problems with floating point,
|
|
4
5
|
# See: https://docs.python.org/3/tutorial/floatingpoint.html
|
|
5
6
|
# Also: https://docs.python.org/3/library/decimal.html#floating-point-notes
|
|
@@ -7,41 +8,11 @@
|
|
|
7
8
|
import math
|
|
8
9
|
from decimal import Decimal
|
|
9
10
|
#
|
|
10
|
-
import spectres
|
|
11
11
|
import numpy as np
|
|
12
12
|
#
|
|
13
13
|
import sparcl.client
|
|
14
14
|
|
|
15
15
|
|
|
16
|
-
# Per paper, should be able to pass all flux in one call to spectres
|
|
17
|
-
# https://arxiv.org/pdf/1705.05165.pdf
|
|
18
|
-
# Perhaps users would rather the bins uniform (1,5,20 Angstroms?)
|
|
19
|
-
def resample_flux(records, wavstep=1):
|
|
20
|
-
smallest = math.floor(min([min(r.wavelength) for r in records]))
|
|
21
|
-
largest = math.ceil(max([max(r.wavelength) for r in records]))
|
|
22
|
-
|
|
23
|
-
#!wrange = largest - smallest
|
|
24
|
-
#new_wavs = np.fromfunction(lambda i: i + smallest, (wrange,), dtype=int)
|
|
25
|
-
#flux_2d = np.ones([len(records), wrange])
|
|
26
|
-
|
|
27
|
-
new_wavs = np.array(range(smallest, largest + 1, wavstep))
|
|
28
|
-
flux_2d = np.full([len(records), len(new_wavs)], None, dtype=float)
|
|
29
|
-
|
|
30
|
-
for idx, rec in enumerate(records):
|
|
31
|
-
flux_2d[idx] = spectres.spectres(new_wavs,
|
|
32
|
-
rec.wavelength,
|
|
33
|
-
rec.flux,
|
|
34
|
-
verbose=False)
|
|
35
|
-
return flux_2d, new_wavs
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
def tt0(numrecs=20):
|
|
39
|
-
client = sparcl.client.SparclClient()
|
|
40
|
-
found = client.find(constraints=dict(data_release=['BOSS-DR16']),
|
|
41
|
-
limit=numrecs)
|
|
42
|
-
got = client.retrieve(found.ids)
|
|
43
|
-
flux_2d, new_wavs = resample_flux(got.records)
|
|
44
|
-
return flux_2d, new_wavs
|
|
45
16
|
|
|
46
17
|
|
|
47
18
|
# Map every wavelength of every record to index (ri,wi)
|
|
@@ -62,7 +33,7 @@ def tt0(numrecs=20):
|
|
|
62
33
|
#! return ar
|
|
63
34
|
|
|
64
35
|
|
|
65
|
-
def
|
|
36
|
+
def _wavelength_offsets(records):
|
|
66
37
|
# sorted list of wavelengths from ALL records
|
|
67
38
|
window = sorted(
|
|
68
39
|
set(records[0].wavelength).union(*[r.wavelength for r in records[1:]]))
|
|
@@ -72,7 +43,7 @@ def wavelength_offsets(records):
|
|
|
72
43
|
return(window, offsets)
|
|
73
44
|
|
|
74
45
|
|
|
75
|
-
def
|
|
46
|
+
def _validate_wavelength_alignment(records, window, offsets, precision=None):
|
|
76
47
|
PLACES = Decimal(10) ** -precision if precision is not None else None
|
|
77
48
|
#! print(f'DBG: PLACES={PLACES}')
|
|
78
49
|
# Given an exact wavelength match between first wl (wavelength) in a rec
|
|
@@ -85,14 +56,17 @@ def validate_wavelength_alignment(records, window, offsets, precision=None):
|
|
|
85
56
|
else:
|
|
86
57
|
recwl = Decimal(rwl).quantize(PLACES)
|
|
87
58
|
wwl = window[offsets[ri] + wi]
|
|
88
|
-
msg = (f'Wavelength in '
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
assert recwl == wwl, msg
|
|
94
|
-
|
|
95
|
-
|
|
59
|
+
#! msg = (f'Wavelength in '
|
|
60
|
+
#! f'Record[{ri}][{wi}] ({recwl}) does not match '
|
|
61
|
+
#! f'Window[{offsets[ri]+wi} = offset[{ri}]={offsets[ri]} '
|
|
62
|
+
#! f'+ {wi}] ({wwl})'
|
|
63
|
+
#! )
|
|
64
|
+
#! assert recwl == wwl, msg
|
|
65
|
+
if recwl != wwl:
|
|
66
|
+
msg = (f'The spectra cannot be aligned with the given'
|
|
67
|
+
f' "precision" parameter ({precision}).'
|
|
68
|
+
f' Try lowering the precision value.')
|
|
69
|
+
raise Exception(msg)
|
|
96
70
|
|
|
97
71
|
|
|
98
72
|
# We want to align a bunch of records by wavelength into a single
|
|
@@ -100,9 +74,9 @@ def validate_wavelength_alignment(records, window, offsets, precision=None):
|
|
|
100
74
|
# are not guaranteed that this is possible -- even if using only
|
|
101
75
|
# records from a single DataSet. So validate it first.
|
|
102
76
|
# (If not valid, allowing wavelength slop might help.)
|
|
103
|
-
def
|
|
77
|
+
def _align_wavelengths(records):
|
|
104
78
|
window, offsets = wavelength_offsets(records)
|
|
105
|
-
|
|
79
|
+
_validate_wavelength_alignment(records, window, offsets)
|
|
106
80
|
ar = np.ones([len(records), len(window)])
|
|
107
81
|
for ri, r in enumerate(records):
|
|
108
82
|
for wi, wl in enumerate(r.wavelength):
|
|
@@ -110,7 +84,7 @@ def align_wavelengths(records):
|
|
|
110
84
|
return ar
|
|
111
85
|
|
|
112
86
|
|
|
113
|
-
def
|
|
87
|
+
def _tt1(numrecs=20, dr='BOSS-DR16'):
|
|
114
88
|
client = sparcl.client.SparclClient()
|
|
115
89
|
found = client.find(constraints=dict(data_release=[dr]),
|
|
116
90
|
limit=numrecs)
|
|
@@ -119,12 +93,13 @@ def tt1(numrecs=20, dr='BOSS-DR16'):
|
|
|
119
93
|
window, offsets = wavelength_offsets(records)
|
|
120
94
|
print(f'Built window len={len(window)}; offsets={offsets}')
|
|
121
95
|
#return records, window, offsets
|
|
122
|
-
ar =
|
|
96
|
+
ar = _align_wavelengths(records)
|
|
123
97
|
return ar
|
|
124
98
|
|
|
125
99
|
|
|
126
100
|
# precision:: number of decimal places
|
|
127
|
-
|
|
101
|
+
# "records" must contain "wavelength" field.
|
|
102
|
+
def _wavelength_grid_offsets(records, precision=11):
|
|
128
103
|
PLACES = Decimal(10) ** -precision
|
|
129
104
|
|
|
130
105
|
# set of wavelengths from ALL records. Quantized to precision
|
|
@@ -141,22 +116,100 @@ def wavelength_grid_offsets(records, precision=11):
|
|
|
141
116
|
|
|
142
117
|
# return 2D numpy array of FLUX values that is aligned to wavelength GRID.
|
|
143
118
|
# GRID is generally wider than flux for single record. Pad with NaN.
|
|
144
|
-
def
|
|
145
|
-
|
|
119
|
+
def _flux_grid(records, grid, offsets, precision=None):
|
|
120
|
+
_validate_wavelength_alignment(records, grid, offsets, precision=precision)
|
|
146
121
|
ar = np.full([len(records), len(grid)], np.nan)
|
|
147
122
|
for ri, r in enumerate(records):
|
|
148
123
|
for fi, flux in enumerate(r.flux):
|
|
149
124
|
ar[ri, offsets[ri] + fi] = flux
|
|
150
125
|
return ar
|
|
151
126
|
|
|
152
|
-
|
|
153
|
-
def
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
127
|
+
# RETURN 2D nparray(records,wavelengthGrid) = fieldValue
|
|
128
|
+
def _field_grid(records, fieldName, grid, offsets, precision=None):
|
|
129
|
+
ar = np.full([len(records), len(grid)], np.nan)
|
|
130
|
+
for ri, r in enumerate(records):
|
|
131
|
+
for fi, fieldValue in enumerate(r[fieldName]):
|
|
132
|
+
ar[ri, offsets[ri] + fi] = fieldValue
|
|
133
|
+
return ar # (wavelengthGrid, records)
|
|
134
|
+
|
|
135
|
+
# RETURN 2D nparray(fields,wavelengthGrid) = fieldValue
|
|
136
|
+
#! def rec_grid(rec, fields, grid, offsets, precision=None):
|
|
137
|
+
#! ar = np.full([len(fields), len(grid)], np.nan)
|
|
138
|
+
#! ri = 0
|
|
139
|
+
#! for fi, fieldValue in enumerate(r[fieldName]):
|
|
140
|
+
#! ar[ri, offsets[ri] + fi] = fieldValue
|
|
141
|
+
#! return ar # (wavelengthGrid, fields)
|
|
142
|
+
|
|
143
|
+
|
|
144
|
+
# Align flux from records into one array using quantization
|
|
145
|
+
#! def flux_records(records, precision=None):
|
|
146
|
+
#! grid, offsets = wavelength_grid_offsets(records, precision=precision)
|
|
147
|
+
#! ar = _flux_grid(records, grid, offsets, precision=precision)
|
|
148
|
+
#! return ar, np.array([float(x) for x in grid])
|
|
149
|
+
|
|
150
|
+
def _validate_spectra_fields(records, fields):
|
|
151
|
+
spectra_fields = [client.fields.n2o['BOSS-DR16'][k] for k,v in client.fields.attrs['BOSS-DR16'].items() if v['storage']=='S']
|
|
152
|
+
[k for k in records[0].keys() if not k.startswith('_')]
|
|
153
|
+
|
|
154
|
+
|
|
155
|
+
# TOP level: Intended for access from Jupyter NOTEBOOK.
|
|
156
|
+
# Align spectra related field from records into one array using quantization.
|
|
157
|
+
def align_records(records, fields=['flux','wavelength'], precision=7):
|
|
158
|
+
"""Align given spectra-type fields to a common wavelength grid.
|
|
159
|
+
|
|
160
|
+
Args:
|
|
161
|
+
records (list): List of dictionaries. The keys for all these dictionaries
|
|
162
|
+
are Science Field Names.
|
|
163
|
+
|
|
164
|
+
fields (:obj:`list`, optional): List of Science Field Names of
|
|
165
|
+
spectra related fields to align and include in the results.
|
|
166
|
+
DEFAULT=['flux', 'wavelength']
|
|
167
|
+
|
|
168
|
+
precision (:obj:`int`, optional): Number of decimal points to use for
|
|
169
|
+
quantizing wavelengths into a grid.
|
|
170
|
+
DEFAULT=7
|
|
171
|
+
|
|
172
|
+
Returns:
|
|
173
|
+
tuple containing:
|
|
174
|
+
- ar_dict(dict): Dictionary of 2D numpy arrays keyed by Field Name.
|
|
175
|
+
Each array is shape: (numRecs, numzGridWavelengths)
|
|
176
|
+
- grid(ndarray): 1D numpy array containing wavelength values.
|
|
177
|
+
|
|
178
|
+
Example:
|
|
179
|
+
>>> client = sparcl.client.SparclClient()
|
|
180
|
+
>>> specflds = ['wavelength', 'model']
|
|
181
|
+
>>> cons = {"data_release": ['BOSS-DR16']}
|
|
182
|
+
>>> found = client.find(constraints=cons, limit=21)
|
|
183
|
+
>>> got = client.retrieve(found.ids, include=specflds)
|
|
184
|
+
>>> ar_dict, grid = align_records(got.records, fields=specflds)
|
|
185
|
+
>>> ar_dict['model'].shape
|
|
186
|
+
(21, 4670)
|
|
187
|
+
|
|
188
|
+
"""
|
|
189
|
+
# Report Garbage In
|
|
190
|
+
if 'wavelength' not in fields:
|
|
191
|
+
msg = (f'You must provide "wavelength" in the list provided'
|
|
192
|
+
f' in the "fields" paramter. Got: {fields}')
|
|
193
|
+
raise Exception(msg)
|
|
194
|
+
if 'wavelength' not in records[0]:
|
|
195
|
+
msg = (f'Records must contain the "wavelength" field.'
|
|
196
|
+
f' The first record contains fields: {sorted(records[0].keys())}')
|
|
197
|
+
raise Exception(msg)
|
|
198
|
+
|
|
199
|
+
#! _validate_spectra_fields(records, fields)
|
|
200
|
+
grid, offsets = _wavelength_grid_offsets(records, precision=precision)
|
|
201
|
+
_validate_wavelength_alignment(records, grid, offsets, precision=precision)
|
|
202
|
+
|
|
203
|
+
# One slice for each field; each slice a 2darray(wavelength, record)=fldVal
|
|
204
|
+
adict = dict()
|
|
205
|
+
for fld in fields:
|
|
206
|
+
ar = _field_grid(records, fld, grid, offsets, precision=None)
|
|
207
|
+
adict[fld] = ar
|
|
208
|
+
|
|
209
|
+
return adict, np.array([float(x) for x in grid])
|
|
210
|
+
|
|
211
|
+
|
|
212
|
+
def _tt(numrecs=9, dr='BOSS-DR16', precision=7):
|
|
160
213
|
# Get sample of NUMRECS records from DR DataSet.
|
|
161
214
|
client = sparcl.client.SparclClient()
|
|
162
215
|
found = client.find(constraints=dict(data_release=[dr]),
|
|
@@ -164,10 +217,14 @@ def tt(numrecs=9, dr='BOSS-DR16', precision=7):
|
|
|
164
217
|
got = client.retrieve(found.ids)
|
|
165
218
|
records = got.records
|
|
166
219
|
|
|
167
|
-
#! grid, offsets =
|
|
220
|
+
#! grid, offsets = _wavelength_grid_offsets(records, precision=precision)
|
|
168
221
|
#! print(f'Built grid len={len(grid)} '
|
|
169
222
|
#! f'offsets({len(offsets)})[:5]={list(offsets.values())[:5]}')
|
|
170
|
-
#! ar =
|
|
223
|
+
#! ar = _flux_grid(records, grid, offsets, precision=precision)
|
|
171
224
|
ar, grid = flux_records(records, precision=precision)
|
|
172
225
|
return ar, grid # ar (numRecs,len(grid))
|
|
173
226
|
# with np.printoptions(threshold=np.inf, linewidth=210, formatter=dict(float=lambda v: f'{v: > 7.3f}')): print(ar.T) # noqa: E501
|
|
227
|
+
|
|
228
|
+
if __name__ == "__main__":
|
|
229
|
+
import doctest
|
|
230
|
+
doctest.testmod()
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# NOT INTENDED FOR PUBLIC USE!
|
|
2
|
+
#
|
|
3
|
+
# See:
|
|
4
|
+
# https://spectres.readthedocs.io/en/latest/
|
|
5
|
+
|
|
6
|
+
import spectres
|
|
7
|
+
|
|
8
|
+
# Per paper, should be able to pass all flux in one call to spectres
|
|
9
|
+
# https://arxiv.org/pdf/1705.05165.pdf
|
|
10
|
+
# Perhaps users would rather the bins uniform (1,5,20 Angstroms?)
|
|
11
|
+
def _resample_flux(records, wavstep=1):
|
|
12
|
+
smallest = math.floor(min([min(r.wavelength) for r in records]))
|
|
13
|
+
largest = math.ceil(max([max(r.wavelength) for r in records]))
|
|
14
|
+
|
|
15
|
+
#!wrange = largest - smallest
|
|
16
|
+
#new_wavs = np.fromfunction(lambda i: i + smallest, (wrange,), dtype=int)
|
|
17
|
+
#flux_2d = np.ones([len(records), wrange])
|
|
18
|
+
|
|
19
|
+
new_wavs = np.array(range(smallest, largest + 1, wavstep))
|
|
20
|
+
flux_2d = np.full([len(records), len(new_wavs)], None, dtype=float)
|
|
21
|
+
|
|
22
|
+
for idx, rec in enumerate(records):
|
|
23
|
+
flux_2d[idx] = spectres.spectres(new_wavs,
|
|
24
|
+
rec.wavelength,
|
|
25
|
+
rec.flux,
|
|
26
|
+
verbose=False)
|
|
27
|
+
return flux_2d, new_wavs
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _tt0(numrecs=20):
|
|
31
|
+
client = sparcl.client.SparclClient()
|
|
32
|
+
found = client.find(constraints=dict(data_release=['BOSS-DR16']),
|
|
33
|
+
limit=numrecs)
|
|
34
|
+
got = client.retrieve(found.ids)
|
|
35
|
+
flux_2d, new_wavs = _resample_flux(got.records)
|
|
36
|
+
return flux_2d, new_wavs
|
sparcl/utils.py
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
# Python library
|
|
2
|
-
#!import os
|
|
3
2
|
import datetime
|
|
4
3
|
import time
|
|
5
4
|
import socket
|
|
5
|
+
import itertools
|
|
6
6
|
# External packages
|
|
7
7
|
# none
|
|
8
8
|
# LOCAL packages
|
|
@@ -142,3 +142,22 @@ def dict2tree(obj, name=None, prefix=''):
|
|
|
142
142
|
def invLUT(lut):
|
|
143
143
|
"""Given dict[k]=v, Return dict[v]=k"""
|
|
144
144
|
return {v: k for k, v in lut.items()}
|
|
145
|
+
|
|
146
|
+
def count_values(recs):
|
|
147
|
+
"""Count number of non-None values in a list of dictionaries.
|
|
148
|
+
A key that exists with a value of None is treated the same as a
|
|
149
|
+
key that does not exist at all. i.e. It does not add to the count.
|
|
150
|
+
|
|
151
|
+
Args:
|
|
152
|
+
recs (:obj:`list`): ('records') List of dictionaries.
|
|
153
|
+
|
|
154
|
+
Returns:
|
|
155
|
+
A dictionary. Keys are the full list of keys available in any
|
|
156
|
+
of the recs. Values are the count of occurances of non-None values
|
|
157
|
+
for that key.
|
|
158
|
+
|
|
159
|
+
>>> count_values([dict(a=None, b=3), dict(a=1, b=2), dict(a=None, b=2)])
|
|
160
|
+
{'a': 1, 'b': 3}
|
|
161
|
+
"""
|
|
162
|
+
allkeys = set(list(itertools.chain(*recs)))
|
|
163
|
+
return {k: sum(x.get(k) is not None for x in recs) for k in allkeys}
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: sparclclient
|
|
3
|
-
Version: 1.2.
|
|
3
|
+
Version: 1.2.0b4
|
|
4
4
|
Summary: A client for getting spectra data from NOIRLab.
|
|
5
5
|
Home-page: https://github.com/astro-datalab/sparclclient
|
|
6
6
|
Author: NOIRLab DataLab
|
|
@@ -15,7 +15,7 @@ Classifier: Programming Language :: Python :: 3.10
|
|
|
15
15
|
Requires-Python: >=3.6
|
|
16
16
|
Description-Content-Type: text/markdown
|
|
17
17
|
License-File: LICENSE
|
|
18
|
-
Requires-Dist: requests (==2.
|
|
18
|
+
Requires-Dist: requests (==2.31.0)
|
|
19
19
|
Requires-Dist: numpy
|
|
20
20
|
Requires-Dist: spectres
|
|
21
21
|
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
sparcl/Results.py,sha256=S4jQcXnw6qfdIEL-NEsCqMK6LolFaERGxjNWIvNhACM,8039
|
|
2
|
+
sparcl/__init__.py,sha256=sLkvMfJ715Hbp_Lo0vJSC0RRqDYMcvrlMXHbJBBce_M,1030
|
|
3
|
+
sparcl/big_retrieve.py,sha256=q0ScH87QqPL4bz4g0hB0AO3k4c_TiuQrWjBJHqHhE60,798
|
|
4
|
+
sparcl/client.py,sha256=2bmwXjLU28Apod7O7ohkwHAvcGRbURAolYq9xERj9HE,31749
|
|
5
|
+
sparcl/conf.py,sha256=O9l4-vpWBZK0QjhHxjskGO8kHPxBj7mkWlchd2rot1c,953
|
|
6
|
+
sparcl/dls_376.py,sha256=WvZjuZFRU0jgH3ELRrMQdslkMWiF2wFQrSag0cYii-I,887
|
|
7
|
+
sparcl/exceptions.py,sha256=q7ONsLsop9OQJJCD4SEzfdsojv0yo3WQT0SluaxGOQ0,3813
|
|
8
|
+
sparcl/fields.py,sha256=7MpaJQr2d1GktS7aeM4010jyLqDdKQ7BZIF9hM0IjII,5002
|
|
9
|
+
sparcl/gather_2d.py,sha256=ZRr41vNHV4tnf63-QuTu04SlWv6TOzK-CeHpbt9YwOY,9254
|
|
10
|
+
sparcl/resample_spectra.py,sha256=2MO-sDCCFg2eNiK6jQs2EJRu4bNnXycGV8WaOydssG4,1329
|
|
11
|
+
sparcl/type_conversion.py,sha256=RX7OD1iGuuUrf-yAd0ISdiqBq4CP7QlCw0vvkAdHdsQ,13112
|
|
12
|
+
sparcl/unsupported.py,sha256=vkSaK3Ppcxx6mMsqBktUjI0uS7RwBJYH2BkBABsnyIM,1867
|
|
13
|
+
sparcl/utils.py,sha256=YlLUP0j4thUyEwTJAaqJ7zzsvbCxPe5EYTn9kvWGfBY,4682
|
|
14
|
+
sparcl/benchmarks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
15
|
+
sparcl/benchmarks/benchmarks.py,sha256=FPZ2KExfVWHhGt3B4VyfgOhxxsemj7OeBWJO0dyDDC4,9667
|
|
16
|
+
sparclclient-1.2.0b4.dist-info/LICENSE,sha256=y10EluGMCzGs9X4oYCYyix3l6u-lawB_vlGR8qe442Q,1576
|
|
17
|
+
sparclclient-1.2.0b4.dist-info/METADATA,sha256=NtJ980uIF8tOtRNULNS_Y4UlPDI9Gg3ToLzj31Hnyng,867
|
|
18
|
+
sparclclient-1.2.0b4.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
|
|
19
|
+
sparclclient-1.2.0b4.dist-info/top_level.txt,sha256=d5CZ3Duxq3MyQTB2ZqOrdtSBv4GdVceF-pOZFmsuHZY,7
|
|
20
|
+
sparclclient-1.2.0b4.dist-info/RECORD,,
|
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
sparcl/Results.py,sha256=yHetKpwujeqW2RXloo-_9d3JgTu11VLrwSZzVwqZcJU,8000
|
|
2
|
-
sparcl/__init__.py,sha256=4ioeTP4QY5qMXcaT0AQ35jv04F7pWu4mK9VuRVRYbkk,1018
|
|
3
|
-
sparcl/client.py,sha256=ZB6SKMech4vSbo3uywcBp3LB9-EV4Yr5R0E4dj13Lp4,29299
|
|
4
|
-
sparcl/conf.py,sha256=O9l4-vpWBZK0QjhHxjskGO8kHPxBj7mkWlchd2rot1c,953
|
|
5
|
-
sparcl/exceptions.py,sha256=q7ONsLsop9OQJJCD4SEzfdsojv0yo3WQT0SluaxGOQ0,3813
|
|
6
|
-
sparcl/fields.py,sha256=v_QpZ-n0otf1IZoFqjwJz5KUQNpgC9mTPzk_KNZ0qOc,4949
|
|
7
|
-
sparcl/gather_2d.py,sha256=6DbozntC1hUD7AHj4DEtXQWMQVdtGgv8GZkbqmNC44Y,6781
|
|
8
|
-
sparcl/type_conversion.py,sha256=RX7OD1iGuuUrf-yAd0ISdiqBq4CP7QlCw0vvkAdHdsQ,13112
|
|
9
|
-
sparcl/unsupported.py,sha256=vkSaK3Ppcxx6mMsqBktUjI0uS7RwBJYH2BkBABsnyIM,1867
|
|
10
|
-
sparcl/utils.py,sha256=ZcDdOiyKNWcByh67f96utDyCxn7HeB1S6CNkY7A6gJM,3965
|
|
11
|
-
sparcl/benchmarks/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
|
-
sparcl/benchmarks/benchmarks.py,sha256=FPZ2KExfVWHhGt3B4VyfgOhxxsemj7OeBWJO0dyDDC4,9667
|
|
13
|
-
sparclclient-1.2.0b3.dev8.dist-info/LICENSE,sha256=y10EluGMCzGs9X4oYCYyix3l6u-lawB_vlGR8qe442Q,1576
|
|
14
|
-
sparclclient-1.2.0b3.dev8.dist-info/METADATA,sha256=zmA_92V_C_CKhaKbg0aVRMVV5WnEQf6AkRET80TMH4Q,872
|
|
15
|
-
sparclclient-1.2.0b3.dev8.dist-info/WHEEL,sha256=pkctZYzUS4AYVn6dJ-7367OJZivF2e8RA9b_ZBjif18,92
|
|
16
|
-
sparclclient-1.2.0b3.dev8.dist-info/top_level.txt,sha256=d5CZ3Duxq3MyQTB2ZqOrdtSBv4GdVceF-pOZFmsuHZY,7
|
|
17
|
-
sparclclient-1.2.0b3.dev8.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|