sparclclient 1.2.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sparcl/conf.py ADDED
@@ -0,0 +1,34 @@
1
+ # Python Standard Library
2
+ import configparser
3
+ import os.path
4
+
5
+
6
+ class Conf:
7
+ """
8
+ Configuration parameters for `ada_client`.
9
+ """
10
+
11
+ def __init__(self, conf_file=None):
12
+ config = configparser.ConfigParser()
13
+ conf_files = ["~/sparc.ini", "sparcl/sparc.ini"]
14
+ if conf_file is None:
15
+ for cf in conf_files:
16
+ if os.path.exists(os.path.expanduser(cf)):
17
+ config.read(os.path.expanduser(cf))
18
+
19
+ if "ada.server" not in config:
20
+ raise Exception(
21
+ f"Could not find conf file in any of: "
22
+ f'{(",").join(conf_files)} '
23
+ f"Create one and try again."
24
+ )
25
+
26
+ self.config = config
27
+
28
+ @property
29
+ def server_baseurl(self):
30
+ return self.config["sparc.server"]["ServerBaseUrl"]
31
+
32
+ @property
33
+ def server_timeout(self):
34
+ return self.config["sparc.server"]["ServerTimout"]
sparcl/exceptions.py ADDED
@@ -0,0 +1,141 @@
1
+ import traceback
2
+
3
+
4
+ def genSparclException(response, verbose=False):
5
+ """Given status from Server response.json(), which is a dict, generate
6
+ a native SPARCL exception suitable for Science programs."""
7
+
8
+ content = response.content
9
+ if verbose:
10
+ print(f"Exception: response content={content}")
11
+ status = response.json()
12
+
13
+ # As of Python 3.10.0.alpha6, python "match" statement could be used
14
+ # instead of if-elif-else.
15
+ # https://docs.python.org/3.10/whatsnew/3.10.html#pep-634-structural-pattern-matching
16
+ if status.get("errorCode") == "BADPATH":
17
+ return BadPath(status.get("errorMessage"))
18
+ elif status.get("errorCode") == "BADQUERY":
19
+ return BadQuery(status.get("errorMessage"))
20
+ elif status.get("errorCode") == "UNKFIELD":
21
+ return UnknownField(status.get("errorMessage"))
22
+ elif status.get("errorCode") == "BADCONST":
23
+ return BadSearchConstraint(status.get("errorMessage"))
24
+ else:
25
+ return UnknownServerError(
26
+ f"{status.get('errorMessage')} " f"[{status.get('errorCode')}]"
27
+ )
28
+
29
+
30
+ class BaseSparclException(Exception):
31
+ """Base Class for all SPARCL exceptions."""
32
+
33
+ error_code = "UNKNOWN"
34
+ error_message = "<NA>"
35
+ traceback = None
36
+
37
+ def get_subclass_name(self):
38
+ return self.__class__.__name__
39
+
40
+ def __init__(self, error_message, error_code=None):
41
+ Exception.__init__(self)
42
+ self.error_message = error_message
43
+ if error_code:
44
+ self.error_code = error_code
45
+ self.traceback = traceback.format_exc()
46
+
47
+ def __str__(self):
48
+ return f"[{self.error_code}] {self.error_message}"
49
+
50
+ def to_dict(self):
51
+ """Convert a SPARCL exception to a python dictionary"""
52
+ dd = dict(errorMessage=self.error_message, errorCode=self.error_code)
53
+ if self.traceback is not None:
54
+ dd["traceback"] = self.traceback
55
+ return dd
56
+
57
+
58
+ class BadPath(BaseSparclException):
59
+ """A field path starts with a non-core field."""
60
+
61
+ error_code = "BADPATH"
62
+
63
+
64
+ class BadQuery(BaseSparclException):
65
+ """Bad find constraints."""
66
+
67
+ error_code = "BADPATH"
68
+
69
+
70
+ class BadInclude(BaseSparclException):
71
+ """Include list contains invalid data field(s)."""
72
+
73
+ error_code = "BADINCL"
74
+
75
+
76
+ class UnknownServerError(BaseSparclException):
77
+ """Client got a status response from the SPARC Server that we do not
78
+ know how to decode."""
79
+
80
+ error_code = "UNKNOWN"
81
+
82
+
83
+ class UnkDr(BaseSparclException):
84
+ """The Data Release is not known or not supported."""
85
+
86
+ error_code = "UNKDR"
87
+
88
+
89
+ class ReadTimeout(BaseSparclException):
90
+ """The server did not send any data in the allotted amount of time."""
91
+
92
+ error_code = "RTIMEOUT"
93
+
94
+
95
+ class UnknownSparcl(BaseSparclException):
96
+ """Unknown SPARCL error. If this is ever raised (seen in a log)
97
+ create and use a new BaseSparcException exception that is more specific."""
98
+
99
+ error_code = "UNKSPARC"
100
+
101
+
102
+ class UnknownField(BaseSparclException):
103
+ """Unknown field name for a record"""
104
+
105
+ error_code = "UNKFIELD"
106
+
107
+
108
+ class NoCommonIdField(BaseSparclException):
109
+ """The field name for Science id field is not common to all Data Sets"""
110
+
111
+ error_code = "IDNOTCOM"
112
+
113
+
114
+ class ServerConnectionError(BaseSparclException):
115
+ error_code = "SRVCONER"
116
+
117
+
118
+ class BadSearchConstraint(BaseSparclException):
119
+ error_code = "BADSCONS"
120
+
121
+
122
+ class NoRecords(BaseSparclException):
123
+ """Results did not contain any records"""
124
+
125
+ error_code = "NORECORD"
126
+
127
+
128
+ class TooManyRecords(BaseSparclException):
129
+ """Too many records asked for in RETRIEVE"""
130
+
131
+ error_code = "TOOMANYR"
132
+
133
+
134
+ class NoIDs(BaseSparclException):
135
+ """The length of the list of original IDs passed to the reorder
136
+ method was zero"""
137
+
138
+ error_code = "NOIDS"
139
+
140
+
141
+ # error_code values should be no bigger than 8 characters 12345678
sparcl/fields.py ADDED
@@ -0,0 +1,160 @@
1
+ """Get Field names associated with various SPARCL conditions.
2
+ """
3
+ # Python Standard Library
4
+ from collections import defaultdict
5
+
6
+ # External Packages
7
+ import requests
8
+
9
+
10
+ def validate_fields(datafields):
11
+ # datafields is simply:
12
+ # DataField.objects.all().values(*atts)
13
+
14
+ drs = set([df["data_release"] for df in datafields])
15
+ core = {
16
+ df["origdp"]: df["newdp"] for df in datafields if df["storage"] == "C"
17
+ }
18
+
19
+ o2n = {
20
+ dr: {
21
+ df["origdp"]: df["newdp"]
22
+ for df in datafields
23
+ if df["data_release"] == dr
24
+ }
25
+ for dr in drs
26
+ }
27
+
28
+ for dr, df in o2n.items():
29
+ # 1-1 mapping origdp <-> newdp across all DR
30
+ if len(set(df.values())) != len(df):
31
+ msg = (
32
+ f"Data Release={dr} does not have a one-to-one mapping "
33
+ f"between Original and Science field names."
34
+ )
35
+ raise Exception(msg)
36
+
37
+ acore = defaultdict(list) # ambiguous core fields(more than one value)
38
+ for k in core.keys():
39
+ if df.get(k) != core.get(k):
40
+ acore[k].append(df.get(k))
41
+ if len(acore) > 0:
42
+ msg = (
43
+ f"DataFields do not have the same "
44
+ f"Science field name for core values across all Data Sets. "
45
+ f"{dict(acore)}"
46
+ )
47
+ raise Exception(msg)
48
+
49
+ return True
50
+
51
+
52
+ class Fields: # Derived from a single query
53
+ """Lookup of Field Names"""
54
+
55
+ def __init__(self, apiurl):
56
+ # [rec, ...]
57
+ # where rec is dict containing keys:
58
+ # 'data_release', 'origdp', 'newdp', 'storage', 'default', 'all'
59
+ datafields = requests.get(f"{apiurl}/datafields/").json()
60
+
61
+ validate_fields(datafields)
62
+
63
+ dr_list = set(df["data_release"] for df in datafields)
64
+
65
+ self.datafields = datafields
66
+ # o2n[DR][InternalName] => ScienceName
67
+ self.o2n = {
68
+ dr: {
69
+ df["origdp"]: df["newdp"]
70
+ for df in datafields
71
+ if df["data_release"] == dr
72
+ }
73
+ for dr in dr_list
74
+ }
75
+ # n2o[DR][ScienceName] => InternalName
76
+ self.n2o = {
77
+ dr: {
78
+ df["newdp"]: df["origdp"]
79
+ for df in datafields
80
+ if df["data_release"] == dr
81
+ }
82
+ for dr in dr_list
83
+ }
84
+ self.all_drs = dr_list
85
+ self.all_fields = set([df["newdp"] for df in datafields])
86
+ self.datafields = datafields
87
+
88
+ # Per DataRelease: get Storage, Default, All for each (user) fieldname
89
+ # dr_attrs[DR][newdp] => dict[storage,default,all]
90
+ self.attrs = {
91
+ dr: {
92
+ df["newdp"]: {
93
+ "storage": df["storage"],
94
+ "default": df["default"],
95
+ "all": df["all"],
96
+ }
97
+ for df in datafields
98
+ if df["data_release"] == dr
99
+ }
100
+ for dr in dr_list
101
+ }
102
+
103
+ @property
104
+ def all_datasets(self):
105
+ return self.all_drs
106
+
107
+ def _science_name(self, internal_name, dataset):
108
+ return self.o2n[dataset].get(internal_name)
109
+
110
+ def _internal_name(self, science_name, dataset):
111
+ #!return self.n2o[dataset][science_name]
112
+ return self.n2o[dataset].get(science_name)
113
+
114
+ def filter_fields(self, attr, dataset_list):
115
+ fields = set()
116
+ for dr in dataset_list:
117
+ for k, v in self.attrs[dr].items():
118
+ if v.get(attr):
119
+ fields.add(k)
120
+ return fields
121
+
122
+ def default_retrieve_fields(self, dataset_list=None):
123
+ if dataset_list is None:
124
+ dataset_list = self.all_drs
125
+ return self.filter_fields("default", dataset_list)
126
+
127
+ def all_retrieve_fields(self, dataset_list=None):
128
+ if dataset_list is None:
129
+ dataset_list = self.all_drs
130
+ return self.filter_fields("all", dataset_list)
131
+
132
+ def common(self, dataset_list=None):
133
+ """Fields common to DATASET_LIST (or All datasets if None)"""
134
+ if dataset_list is None:
135
+ dataset_list = self.all_drs
136
+ return sorted(
137
+ set.intersection(
138
+ *[set(self.n2o[dr].keys()) for dr in dataset_list]
139
+ )
140
+ )
141
+
142
+ def common_internal(self, dataset_list=None):
143
+ """Fields common to DATASET_LIST (or All datasets if None)"""
144
+ if dataset_list is None:
145
+ dataset_list = self.all_drs
146
+ return set.intersection(
147
+ *[set(self.o2n[dr].keys()) for dr in dataset_list]
148
+ )
149
+
150
+ # There is probably an algorithm to partition ELEMENTS into
151
+ # the _minumum_ number of SETS such that the union of all SETS
152
+ # contains all ELEMENTS. For now, parition by Data Set (when used).
153
+ def field_partitions(self, fields):
154
+ """Partition FIELDS into the DataSets that contain them"""
155
+ dr_fields = defaultdict(list)
156
+ for field in fields:
157
+ for dr in self.all_drs:
158
+ if field in self.n2o[dr]:
159
+ dr_fields[dr].append(field)
160
+ return dict(dr_fields)
sparcl/gather_2d.py ADDED
@@ -0,0 +1,233 @@
1
+ """Align or resample spectra related fields across multiple records."""
2
+ # See client.py for Doctest example
3
+ #
4
+ # For info about problems with floating point,
5
+ # See: https://docs.python.org/3/tutorial/floatingpoint.html
6
+ # Also: https://docs.python.org/3/library/decimal.html#floating-point-notes
7
+ #
8
+ from decimal import Decimal
9
+
10
+ #
11
+ import numpy as np
12
+
13
+ #
14
+ import sparcl.client
15
+
16
+
17
+ # Map every wavelength of every record to index (ri,wi)
18
+ # where
19
+ # ri: Record Index
20
+ # wi: Window Index (offset of wavelength in WINDOW)
21
+ # window: ordered list of wavelengths that include ALL unique
22
+ # wavelengths in all records
23
+ #! def rec_woffset(records, window):
24
+ #! ar = np.ones([len(records), len(window)])
25
+ #! for ri, r in enumerate(records):
26
+ #! for wl in r.wavelength:
27
+ #! try:
28
+ #! wi = window.index(wl)
29
+ #! except:
30
+ #! continue
31
+ #! ar[ri,wi] = wl
32
+ #! return ar
33
+
34
+
35
+ def _wavelength_offsets(records):
36
+ # sorted list of wavelengths from ALL records
37
+ window = sorted(
38
+ set(records[0].wavelength).union(*[r.wavelength for r in records[1:]])
39
+ )
40
+ # offsets[ri] = index into WINDOW
41
+ offsets = {
42
+ ri: window.index(rec.wavelength[0]) for ri, rec in enumerate(records)
43
+ }
44
+ return (window, offsets)
45
+
46
+
47
+ def _validate_wavelength_alignment(records, window, offsets, precision=None):
48
+ PLACES = Decimal(10) ** -precision if precision is not None else None
49
+ #! print(f'DBG: PLACES={PLACES}')
50
+ # Given an exact wavelength match between first wl (wavelength) in a rec
51
+ # and the wl at its offset of WINDOW, ensure all the remaning wls
52
+ # in rec match the next N wls of WINDOW.
53
+ for ri, rec in enumerate(records):
54
+ for wi, rwl in enumerate(rec.wavelength): # wi=recwavelengthIndex
55
+ if precision is None:
56
+ recwl = Decimal(rwl)
57
+ else:
58
+ recwl = Decimal(rwl).quantize(PLACES)
59
+ wwl = window[offsets[ri] + wi]
60
+ #! msg = (f'Wavelength in '
61
+ #! f'Record[{ri}][{wi}] ({recwl}) does not match '
62
+ #! f'Window[{offsets[ri]+wi} = offset[{ri}]={offsets[ri]} '
63
+ #! f'+ {wi}] ({wwl})'
64
+ #! )
65
+ #! assert recwl == wwl, msg
66
+ if recwl != wwl:
67
+ msg = (
68
+ f"The spectra cannot be aligned with the given"
69
+ f' "precision" parameter ({precision}).'
70
+ f" Try lowering the precision value."
71
+ )
72
+ raise Exception(msg)
73
+
74
+
75
+ # We want to align a bunch of records by wavelength into a single
76
+ # 2d numpy array (record vs wavelength). In general, we
77
+ # are not guaranteed that this is possible -- even if using only
78
+ # records from a single DataSet. So validate it first.
79
+ # (If not valid, allowing wavelength slop might help.)
80
+ def _align_wavelengths(records):
81
+ window, offsets = _wavelength_offsets(records)
82
+ _validate_wavelength_alignment(records, window, offsets)
83
+ ar = np.ones([len(records), len(window)])
84
+ for ri, r in enumerate(records):
85
+ for wi, wl in enumerate(r.wavelength):
86
+ ar[ri, offsets[ri + wi]] = wl # @@@WRONG!!! We want FLUX
87
+ return ar
88
+
89
+
90
+ def _tt1(numrecs=20, dr="BOSS-DR16"):
91
+ client = sparcl.client.SparclClient()
92
+ found = client.find(constraints=dict(data_release=[dr]), limit=numrecs)
93
+ got = client.retrieve(found.ids)
94
+ records = got.records
95
+ window, offsets = _wavelength_offsets(records)
96
+ print(f"Built window len={len(window)}; offsets={offsets}")
97
+ # return records, window, offsets
98
+ ar = _align_wavelengths(records)
99
+ return ar
100
+
101
+
102
+ # precision:: number of decimal places
103
+ # "records" must contain "wavelength" field.
104
+ def _wavelength_grid_offsets(records, precision=11):
105
+ PLACES = Decimal(10) ** -precision
106
+
107
+ # set of wavelengths from ALL records. Quantized to precision
108
+ gset = set() # Grid SET
109
+ for r in records:
110
+ gset.update([Decimal(w).quantize(PLACES) for w in r.wavelength])
111
+ grid = sorted(gset) # 1D sorted list of wavelengths (bigger than any rec)
112
+ #! print(f'DBG grid({len(grid)})[:10]={grid[:10]}')
113
+ # offsets[ri] = index into GRID
114
+ offsets = {
115
+ ri: grid.index(Decimal(rec.wavelength[0]).quantize(PLACES))
116
+ for ri, rec in enumerate(records)
117
+ }
118
+ return (grid, offsets)
119
+
120
+
121
+ # return 2D numpy array of FLUX values that is aligned to wavelength GRID.
122
+ # GRID is generally wider than flux for single record. Pad with NaN.
123
+ def _flux_grid(records, grid, offsets, precision=None):
124
+ _validate_wavelength_alignment(records, grid, offsets, precision=precision)
125
+ ar = np.full([len(records), len(grid)], np.nan)
126
+ for ri, r in enumerate(records):
127
+ for fi, flux in enumerate(r.flux):
128
+ ar[ri, offsets[ri] + fi] = flux
129
+ return ar
130
+
131
+
132
+ # RETURN 2D nparray(records,wavelengthGrid) = fieldValue
133
+ def _field_grid(records, fieldName, grid, offsets, precision=None):
134
+ ar = np.full([len(records), len(grid)], np.nan)
135
+ for ri, r in enumerate(records):
136
+ for fi, fieldValue in enumerate(r[fieldName]):
137
+ ar[ri, offsets[ri] + fi] = fieldValue
138
+ return ar # (wavelengthGrid, records)
139
+
140
+
141
+ # RETURN 2D nparray(fields,wavelengthGrid) = fieldValue
142
+ #! def rec_grid(rec, fields, grid, offsets, precision=None):
143
+ #! ar = np.full([len(fields), len(grid)], np.nan)
144
+ #! ri = 0
145
+ #! for fi, fieldValue in enumerate(r[fieldName]):
146
+ #! ar[ri, offsets[ri] + fi] = fieldValue
147
+ #! return ar # (wavelengthGrid, fields)
148
+
149
+
150
+ # Align flux from records into one array using quantization
151
+ #! def flux_records(records, precision=None):
152
+ #! grid, offsets = wavelength_grid_offsets(records, precision=precision)
153
+ #! ar = _flux_grid(records, grid, offsets, precision=precision)
154
+ #! return ar, np.array([float(x) for x in grid])
155
+
156
+
157
+ def _validate_spectra_fields(records, fields):
158
+ #! spectra_fields = [
159
+ #! client.fields.n2o["BOSS-DR16"][k]
160
+ #! for k, v in client.fields.attrs["BOSS-DR16"].items()
161
+ #! if v["storage"] == "S"
162
+ #! ]
163
+ [k for k in records[0].keys() if not k.startswith("_")]
164
+
165
+
166
+ # TOP level: Intended for access from Jupyter NOTEBOOK.
167
+ # Align spectra related field from records into one array using quantization.
168
+ def align_records(records, fields=["flux", "wavelength"], precision=7):
169
+ """Align given spectra-type fields to a common wavelength grid.
170
+
171
+ Args:
172
+ records (list): List of dictionaries.
173
+ The keys for all these dictionaries are Science Field Names.
174
+
175
+ fields (:obj:`list`, optional): List of Science Field Names of
176
+ spectra related fields to align and include in the results.
177
+ DEFAULT=['flux', 'wavelength']
178
+
179
+ precision (:obj:`int`, optional): Number of decimal points to use for
180
+ quantizing wavelengths into a grid.
181
+ DEFAULT=7
182
+
183
+ Returns:
184
+ tuple containing:
185
+ - ar_dict(dict): Dictionary of 2D numpy arrays keyed by Field Name.
186
+ Each array is shape: (numRecs, numzGridWavelengths)
187
+ - grid(ndarray): 1D numpy array containing wavelength values.
188
+
189
+ Example:
190
+ >>> client = sparcl.client.SparclClient()
191
+ >>> specflds = ['wavelength', 'model']
192
+ >>> cons = {"data_release": ['BOSS-DR16']}
193
+ >>> found = client.find(constraints=cons, limit=21)
194
+ >>> got = client.retrieve(found.ids, include=specflds)
195
+ >>> ar_dict, grid = align_records(got.records, fields=specflds)
196
+ >>> ar_dict['model'].shape
197
+ (21, 4666)
198
+
199
+ """
200
+ # Report Garbage In
201
+ if "wavelength" not in fields:
202
+ msg = (
203
+ f'You must provide "wavelength" in the list provided'
204
+ f' in the "fields" paramter. Got: {fields}'
205
+ )
206
+ raise Exception(msg)
207
+ if "wavelength" not in records[0]:
208
+ msg = (
209
+ f'Records must contain the "wavelength" field.'
210
+ f" The first record contains fields: {sorted(records[0].keys())}"
211
+ )
212
+ raise Exception(msg)
213
+
214
+ #! _validate_spectra_fields(records, fields)
215
+ grid, offsets = _wavelength_grid_offsets(records, precision=precision)
216
+ _validate_wavelength_alignment(records, grid, offsets, precision=precision)
217
+
218
+ # One slice for each field; each slice a 2darray(wavelength, record)=fldVal
219
+ adict = dict()
220
+ for fld in fields:
221
+ ar = _field_grid(records, fld, grid, offsets, precision=None)
222
+ adict[fld] = ar
223
+
224
+ return adict, np.array([float(x) for x in grid])
225
+
226
+
227
+ # with np.printoptions(threshold=np.inf, linewidth=210,
228
+ # formatter=dict(float=lambda v: f'{v: > 7.3f}')): print(ar.T) # noqa: E501
229
+
230
+ if __name__ == "__main__":
231
+ import doctest
232
+
233
+ doctest.testmod()