sparclclient 1.2.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
sparcl/Results.py ADDED
@@ -0,0 +1,234 @@
1
+ """Containers for results from SPARCL Server.
2
+ These include results of client.retrieve() client.find().
3
+ """
4
+
5
+ from collections import UserList
6
+
7
+ #!import copy
8
+ from sparcl.utils import _AttrDict
9
+
10
+ # from sparcl.gather_2d import bin_spectra_records
11
+ import sparcl.exceptions as ex
12
+ from warnings import warn
13
+
14
+
15
+ class Results(UserList):
16
+ def __init__(self, dict_list, client=None):
17
+ super().__init__(dict_list)
18
+ self.hdr = dict_list[0]
19
+ self.recs = dict_list[1:]
20
+ self.client = client
21
+ self.fields = client.fields
22
+ self.to_science_fields()
23
+
24
+ # HACK 12/14/2023 -sp- to fix UUID problem presumably
25
+ # produced on stack version upgrade (to Django 4.2, postgres 13+)
26
+ # Done per AB for expediency since real solution will be easier
27
+ # after field-renaming is removed.
28
+ for rec in self.recs:
29
+ if "sparcl_id" in rec:
30
+ rec["sparcl_id"] = str(rec["sparcl_id"])
31
+ # END __init__()
32
+
33
+ # https://docs.python.org/3/library/collections.html#collections.deque.clear
34
+ def clear(self):
35
+ """Delete the contents of this collection."""
36
+ super().clear()
37
+ self.hdr = {}
38
+ self.recs = []
39
+
40
+ @property
41
+ def info(self):
42
+ """Info about this collection.
43
+ e.g. Warnings, parameters used to get the collection, etc."""
44
+ return self.hdr
45
+
46
+ @property
47
+ def count(self):
48
+ """Number of records in this collection."""
49
+ return len(self.recs)
50
+
51
+ @property
52
+ def records(self):
53
+ """Records in this collection. Each record is a dictionary."""
54
+ return self.recs
55
+
56
+ def json(self):
57
+ return self.data
58
+
59
+ # Convert Internal field names to Science field names.
60
+ # SIDE-EFFECT: modifies self.recs
61
+ def to_science_fields(self): # from_orig
62
+ newrecs = list()
63
+ for rec in self.recs:
64
+ newrec = dict()
65
+ dr = rec["_dr"]
66
+ keep = True
67
+ for orig in rec.keys():
68
+ if orig == "_dr":
69
+ # keep DR around unchanged. We need it to rename back
70
+ # to Internal Field Names later.
71
+ newrec[orig] = rec[orig]
72
+ else:
73
+ new = self.fields._science_name(orig, dr)
74
+ if new is None:
75
+ keep = False # We don't have name mapping, toss rec
76
+ newrec[new] = rec[orig]
77
+ if keep:
78
+ newrecs.append(_AttrDict(newrec))
79
+ self.recs = newrecs
80
+
81
+ # Convert Science field names to Internal field names.
82
+ def to_internal_fields(self):
83
+ for rec in self.recs:
84
+ dr = rec.get("_dr")
85
+ for new in rec.keys():
86
+ if new == "_dr":
87
+ # keep DR around unchanged. We need it to rename back
88
+ # to Internal Field Names later.
89
+ continue
90
+ new = self.fields._internal_name(new, dr)
91
+ rec[new] = rec.pop(new)
92
+
93
+ def science_to_internal_fields(self):
94
+ newrecs = list()
95
+ for rec in self.recs:
96
+ newrec = dict()
97
+ dr = rec["_dr"]
98
+ keep = True
99
+ for sci_name in rec.keys():
100
+ if sci_name == "_dr":
101
+ # keep DR around unchanged. We need it to rename back
102
+ # to Internal Field Names later.
103
+ newrec[sci_name] = rec[sci_name]
104
+ else:
105
+ new = self.fields._internal_name(sci_name, dr)
106
+ if new is None:
107
+ keep = False
108
+ newrec[new] = rec[sci_name]
109
+ if keep:
110
+ newrecs.append(_AttrDict(newrec))
111
+ self.recs = newrecs
112
+ return self.recs
113
+
114
+ def reorder(self, ids_og):
115
+ """
116
+ Reorder the retrieved records to be in the same
117
+ order as the original IDs passed to client.retrieve().
118
+
119
+ Args:
120
+ ids_og (:obj:`list`): List of sparcl_ids or specIDs.
121
+
122
+ Returns:
123
+ reordered (:class:`~sparcl.Results.Retrieved`): Contains header and
124
+ reordered records.
125
+ # none_idx (:obj:`list`): List of indices where record is None.
126
+
127
+ """
128
+ if len(ids_og) <= 0:
129
+ msg = (
130
+ f"The list of IDs passed to the reorder method "
131
+ f"does not contain any sparcl_ids or specIDs."
132
+ )
133
+ raise ex.NoIDs(msg)
134
+ elif len(self.recs) <= 0:
135
+ msg = (
136
+ "The retrieved or found results did not "
137
+ "contain any records."
138
+ )
139
+ raise ex.NoRecords(msg)
140
+ else:
141
+ # Transform science fields to internal fields
142
+ new_recs = self.science_to_internal_fields()
143
+ # Get the ids or specids from retrieved records
144
+ if type(ids_og[0]) is str:
145
+ ids_re = [f["sparcl_id"] for f in new_recs]
146
+ elif type(ids_og[0]) is int:
147
+ ids_re = [f["specid"] for f in new_recs]
148
+ # Enumerate the original ids
149
+ dict_og = {x: i for i, x in enumerate(ids_og)}
150
+ # Enumerate the retrieved ids
151
+ dict_re = {x: i for i, x in enumerate(ids_re)}
152
+ # Get the indices of the original ids. Set to None if not found
153
+ idx = [dict_re.get(key, None) for key in dict_og.keys()]
154
+ # Get the indices of None values
155
+ none_idx = [i for i, v in enumerate(idx) if v is None]
156
+ # Reorder the retrieved records
157
+ reordered = [self.recs[i] for i in idx if i is not None]
158
+ # Insert dummy record(s) if applicable
159
+ dummy_record = "{'id': None, 'specid': None, '_dr': 'SDSS-DR16'}"
160
+ for i in none_idx:
161
+ reordered.insert(
162
+ i, {"sparcl_id": None, "specid": None, "_dr": "SDSS-DR16"}
163
+ )
164
+ reordered.insert(0, self.hdr)
165
+ meta = reordered[0]
166
+ if len(none_idx) > 0:
167
+ msg = (
168
+ f"{len(none_idx)} sparcl_ids or specIDs were "
169
+ f"not found in "
170
+ f'the database. Use "client.missing()" '
171
+ f"to get a list of the unavailable IDs. "
172
+ f"To maintain correct reordering, a dummy "
173
+ f"record has been placed at the indices "
174
+ f"where no record was found. Those "
175
+ f"indices are: {none_idx}. The dummy "
176
+ f"record will appear as follows: "
177
+ f"{dummy_record}. "
178
+ )
179
+ meta["status"].update({"warnings": [msg]})
180
+ warn(msg, stacklevel=2)
181
+ return Results(reordered, client=self.client)
182
+
183
+
184
+ # For results of retrieve()
185
+ class Retrieved(Results):
186
+ """Holds spectra records (and header)."""
187
+
188
+ def __init__(self, dict_list, client=None):
189
+ super().__init__(dict_list, client=client)
190
+
191
+ def __repr__(self):
192
+ return f"Retrieved Results: {len(self.recs)} records"
193
+
194
+
195
+ #! def bin_spectra(self):
196
+ #! """Align flux from all records by common wavelength bin.
197
+ #!
198
+ #! A value of nan is used where a record does not contain a flux
199
+ #! value for a specific bin.
200
+ #!
201
+ #! Returns:
202
+ #! flux: 2d numpy array with shape (numRecords, numWavelengthBins)
203
+ #! Flux value for each record, each bin
204
+ #! wavs: 1d numpy array with shape (numWavelengthBins)
205
+ #! Wavelength values for each bin
206
+ #!
207
+ #! Example:
208
+ #! >>> client = sparcl.client.SparclClient()
209
+ #! >>> found = client.find(
210
+ #! constraints={"data_release": ['BOSS-DR16']},
211
+ #! limit=10)
212
+ #! >>> got = client.retrieve(found.ids)
213
+ #! >>> flux2d,wavs = got.bin_spectra()
214
+ #!
215
+ #! """
216
+ #! flux2d, wavs = bin_spectra_records(self.recs)
217
+ #! return flux2d, wavs
218
+
219
+
220
+ class Found(Results):
221
+ """Holds metadata records (and header)."""
222
+
223
+ def __init__(self, dict_list, client=None):
224
+ super().__init__(dict_list, client=client)
225
+
226
+ def __repr__(self):
227
+ return f"Find Results: {len(self.recs)} records"
228
+
229
+ @property
230
+ def ids(self):
231
+ """List of unique identifiers of matched records."""
232
+ #! dr = list(self.fields.all_drs)[0]
233
+
234
+ return [d.get("sparcl_id") for d in self.recs]
sparcl/__init__.py ADDED
@@ -0,0 +1,33 @@
1
+ """
2
+ A client for getting spectra and meta-data from NOIRLab.
3
+ """
4
+
5
+
6
+ # List of packages to import when "from sparcl import *" is used
7
+ __all__ = ["client", "align_records"]
8
+
9
+
10
+ # See semantic versioning
11
+
12
+ # BUT PyPi requires honoring versions like this:
13
+ # https://packaging.python.org/specifications/core-metadata/
14
+ # https://www.python.org/dev/peps/pep-0440/
15
+ #
16
+ # '0.3.0-alpha3.23' is an invalid value for Version.
17
+ # Error: Start and end with a letter or numeral containing only ASCII
18
+ # numeric and '.', '_' and '-'.
19
+ #
20
+ # https://semver.org/ yields possible versions that violate PEP-0440
21
+
22
+ # __version__ = '0.3.21'
23
+ # __version__ = '0.1a3.dev22'
24
+ # __version__ = '0.3.0-alpha3.23'
25
+ # __version__ = '0.3.22'
26
+
27
+ # must mach: [N!]N(.N)*[{a|b|rc}N][.postN][.devN]
28
+ # Example of a correct version string: '0.4.0a3.dev35'
29
+ # __version__ = '1.1'
30
+ # __version__ = '1.2.0b4'
31
+ # __version__ = '1.2.0' # Release
32
+ #__version__ = "1.2.1b3"
33
+ __version__ = "1.2.1"
File without changes
@@ -0,0 +1,337 @@
1
+ #! /usr/bin/env python
2
+ """Benchmark speed of SPARC spectra retrieve with various parameters.
3
+ """
4
+ # EXAMPLES:
5
+ # cd ~/sandbox/sparclclient
6
+ # python3 -m sparcl.benchmarks.benchmarks ~/data/sparc/sids5.list
7
+ # python3 -m sparcl.benchmarks.benchmarks ~/data/sparc/sids644.list
8
+
9
+ # Alice reported 22 minutes on 64K retrieved from specClient (rate=48 spec/sec)
10
+ # slack.spectro: 3/31/2021
11
+
12
+ # Standard Python library
13
+ import argparse
14
+ import logging
15
+ import os
16
+ from pprint import pformat
17
+
18
+ # External packages
19
+ import psutil
20
+
21
+ # Local packages
22
+ from ..client import SparclClient
23
+ from ..utils import tic, toc, here_now
24
+
25
+ # rooturl = 'http://localhost:8030/' #@@@
26
+ rooturl = "http://sparc1.datalab.noirlab.edu:8000/"
27
+
28
+
29
+ def human_size(num, units=["b", "KB", "MB", "GB", "TB", "PB", "EB"]):
30
+ """Returns a human readable string representation of NUM."""
31
+ return (
32
+ f"{num:.1f} {units[0]}"
33
+ if num < 1024
34
+ else human_size(num / 1000, units[1:])
35
+ )
36
+
37
+
38
+ # with open('/data/sparc/sids5.list') as f:
39
+ # specids = [int(line.strip()) for line in f if not line.startswith('#')]
40
+ def run_retrieve(specids, columns=None, xfer="p", verbose=True):
41
+ #!print(f'Retrieving {len(specids):,} spectra')
42
+ psutil.cpu_percent() # begin interval
43
+ client = SparclClient(url=rooturl)
44
+ result = dict(numcols=len(columns), numspecids=len(specids))
45
+ if verbose:
46
+ print(f"Experiment: {pformat(result)}")
47
+ tic()
48
+ data = client.retrieve(specids, columns=columns, xfer=xfer)
49
+ elapsed = toc()
50
+ #!cpu = psutil.cpu_percent(interval=1)
51
+ if verbose:
52
+ print(f"len(specids)={len(specids)} len(data)={len(data)}")
53
+ assert len(specids) == len(data) # @@@ but some of ingest may have failed
54
+ assert len(data[0]["spectra__coadd__flux"]) > 1000
55
+ result.update(
56
+ elapsed=elapsed,
57
+ retrieved=len(data),
58
+ rate=len(data) / elapsed,
59
+ end_smrem=psutil.swap_memory().free,
60
+ end_vmrem=psutil.virtual_memory().available,
61
+ end_cpuload=os.getloadavg()[1],
62
+ end_cpuperc=psutil.cpu_percent(), # end interval
63
+ )
64
+ return result
65
+
66
+
67
+ def run_paged_retrieve(
68
+ specids, columns=None, xfer="p", page=5000, verbose=True, keepall=False
69
+ ):
70
+ """Do 1 more more PAGE size retrieves to get data for all specids"""
71
+ print(f"Paged Retrieve of {len(specids):,} spectra")
72
+ psutil.cpu_percent() # begin interval
73
+ client = SparclClient(url=rooturl)
74
+ result = dict(
75
+ numcols=len(columns), numspecids=len(specids), xfer=xfer, page=page
76
+ )
77
+ if verbose:
78
+ print(f"Experiment: {pformat(result)}")
79
+
80
+ data = []
81
+ datacnt = 0
82
+ tic()
83
+ for cnt in range(0, len(specids), page):
84
+ pdata = client.retrieve(
85
+ specids[cnt : cnt + page], columns=columns, xfer=xfer
86
+ )
87
+ datacnt += len(pdata)
88
+ if keepall:
89
+ data.extend(pdata)
90
+ elapsed = toc()
91
+
92
+ #! cpu = psutil.cpu_percent(interval=1)
93
+ if verbose:
94
+ print(f"len(specids)={len(specids)} datacnt={datacnt}")
95
+ # assert len(specids) == len(data) # @@@but some ingests may have failed
96
+ #!assert len(data[0]['spectra__coadd__flux']) > 1000 # @@@
97
+ result.update(
98
+ elapsed=elapsed,
99
+ retrieved=len(data),
100
+ rate=datacnt / elapsed,
101
+ end_smrem=psutil.swap_memory().free,
102
+ end_vmrem=psutil.virtual_memory().available,
103
+ end_cpuload=os.getloadavg()[1],
104
+ end_cpuperc=psutil.cpu_percent(), # end interval
105
+ )
106
+ return result
107
+
108
+
109
+ # flux,loglam,ivar,and_mask,or_mask,wdisp,sky,model
110
+ allcols = [
111
+ "flux",
112
+ "loglam",
113
+ "ivar",
114
+ "and_mask",
115
+ "or_mask",
116
+ "wdisp",
117
+ "sky",
118
+ "model",
119
+ ]
120
+
121
+ experiment_0 = dict(
122
+ xfers=["p"],
123
+ specidcnts=[600, 60],
124
+ numcols=range(1, 3),
125
+ )
126
+ experiment_1 = dict(
127
+ xfers=["p"],
128
+ specidcnts=[6, 60, 600, 6000, 30000],
129
+ numcols=range(1, 3),
130
+ # numcols=range(1,len(allcols)+1),
131
+ )
132
+ experiment_2 = dict(
133
+ xfers=["p"],
134
+ specidcnts=[1000, 100, 10],
135
+ numcols=range(1, len(allcols) + 1),
136
+ )
137
+ experiment_3 = dict(
138
+ xfers=["p"],
139
+ specidcnts=[
140
+ 1000,
141
+ ],
142
+ numcols=reversed(range(1, len(allcols) + 1)),
143
+ )
144
+
145
+ experiment_8 = dict(
146
+ xfers=[
147
+ "p",
148
+ ],
149
+ specidcnts=[
150
+ 65000,
151
+ ],
152
+ numcols=[1, 2, 8],
153
+ )
154
+ experiment_9 = dict(
155
+ xfers=["p", "j"],
156
+ specidcnts=sorted(set([min(7 * 10**x, 65000) for x in range(6)])),
157
+ numcols=range(1, len(allcols) + 1),
158
+ )
159
+
160
+
161
+ def run_trials(allspecids, verbose=True):
162
+ # ex = experiment_9 #@@@
163
+ ex = experiment_8 # @@@
164
+
165
+ xfers = ex["xfers"]
166
+ specidcnts = ex["specidcnts"]
167
+ numcols = ex["numcols"]
168
+
169
+ klist = ["elapsed", "numcols", "numspecids", "page", "rate", "xfer"]
170
+
171
+ all = []
172
+ for xfer in xfers:
173
+ for n in numcols:
174
+ cols = allcols[:n]
175
+ for specidcnt in specidcnts:
176
+ specids = allspecids[:specidcnt]
177
+ #!result = run_retrieve(specids, columns=cols, xfer='p')
178
+ result = run_paged_retrieve(specids, columns=cols, xfer="p")
179
+ if verbose:
180
+ # print(f'Run-Result: {pformat(result)}')
181
+ reduced = dict(
182
+ (k, result[k]) for k in result.keys() if k in klist
183
+ )
184
+ print(f"Run-Result: {reduced}")
185
+ all.append(result)
186
+ report(all, len(allspecids), xfer=xfer)
187
+ return all
188
+
189
+
190
+ def report(results, specidcnt, xfer=None, bandwidth=False):
191
+ hostname, now = here_now()
192
+ min1, min5, min15 = os.getloadavg()
193
+ #!smrem = psutil.swap_memory().free
194
+ #!vmrem = psutil.virtual_memory().available
195
+ #!cpuperc = psutil.cpu_percent(interval=1)
196
+
197
+ if bandwidth:
198
+ pass
199
+ #! s = speedtest.Speedtest()
200
+ #! ul_speed = s.upload(threads=1)
201
+ #! dl_speed = s.download(threads=1)
202
+ else:
203
+ #! ul_speed = 0
204
+ dl_speed = 0
205
+
206
+ #! Upload speed: {human_size(ul_speed)}
207
+ print(f"\nBenchmark run on {hostname} at {now} with {specidcnt} specids.")
208
+ print(
209
+ f"""
210
+ Transfer Method: {"Pickle" if xfer=='p' else "JSON"}
211
+ Download speed: {human_size(dl_speed)}
212
+ """
213
+ )
214
+ # Load Avg: {min5:.1f}
215
+ # (avg num processes running over last 5 minutes)
216
+ # CPU utilization: {cpuperc:.0f}%
217
+ # Swap Mem Avail: {human_size(smrem)}
218
+ # Virtual Mem Avail: {human_size(vmrem)}
219
+ # (Above statistics are for CLIENT.)
220
+
221
+ #!print(f'Column\tSID\tRate \tAvg \tCPU \tSwap\tVirt')
222
+ #!print(f' Count\tCnt\ts/sec\tLoad\tUtil\t Mem\t Mem')
223
+ #!print(f'------\t---\t-----\t----\t----')
224
+ print(f"Column\tSID\tRate ")
225
+ print(f" Count\tCnt\ts/sec")
226
+ print(f"------\t---\t-----")
227
+ for r in results:
228
+ print(
229
+ (
230
+ "{numcols}\t"
231
+ "{numspecids}\t"
232
+ "{rate:.0f}\t"
233
+ #!"{end_cpuload:.02f}\t"
234
+ #!"{end_cpuperc:.0f}%\t"
235
+ #!"{smrem}\t"
236
+ #!"{vmrem}\t"
237
+ ).format(**r)
238
+ )
239
+ # smrem=human_size(r['end_smrem']),
240
+ # vmrem=human_size(r['end_vmrem']),
241
+ print(
242
+ """
243
+ LEGEND:
244
+ Rate:: spectra/second
245
+ Transfer method:: Pickle, Json
246
+ Load:: Number of processes in system run queue averaged over last 5 minutes.
247
+ """
248
+ )
249
+ return "Done"
250
+
251
+
252
+ ##############################################################################
253
+
254
+
255
+ def my_parser():
256
+ parser = argparse.ArgumentParser(
257
+ #!version='1.0.1',
258
+ description="My shiny new python program",
259
+ epilog='EXAMPLE: %(prog)s a b"',
260
+ )
261
+ allcols = [
262
+ "flux",
263
+ "loglam",
264
+ "ivar",
265
+ "and_mask",
266
+ "or_mask",
267
+ "wdisp",
268
+ "sky",
269
+ "model",
270
+ ]
271
+ #!dftcols = 'flux,loglam'
272
+ dftcols = ",".join(allcols)
273
+ parser.add_argument(
274
+ "specids",
275
+ type=argparse.FileType("r"),
276
+ help=("File containing list of " "specobjids. One per line."),
277
+ )
278
+ parser.add_argument(
279
+ "--cols",
280
+ # choices=allcols,
281
+ default=dftcols,
282
+ help=(
283
+ f"List of comma seperated columns to get. " f'Default="{dftcols}"'
284
+ ),
285
+ )
286
+ parser.add_argument(
287
+ "--xfer",
288
+ default="p",
289
+ help="Mode to use to transfer from Server to Client.",
290
+ )
291
+
292
+ parser.add_argument(
293
+ "--loglevel",
294
+ help="Kind of diagnostic output",
295
+ choices=["CRTICAL", "ERROR", "WARNING", "INFO", "DEBUG"],
296
+ default="WARNING",
297
+ )
298
+ return parser
299
+
300
+
301
+ def main():
302
+ parser = my_parser()
303
+ args = parser.parse_args()
304
+ args.specids.close()
305
+ args.specids = args.specids.name
306
+
307
+ log_level = getattr(logging, args.loglevel.upper(), None)
308
+ if not isinstance(log_level, int):
309
+ parser.error("Invalid log level: %s" % args.loglevel)
310
+ logging.basicConfig(
311
+ level=log_level,
312
+ format="%(levelname)s %(message)s",
313
+ datefmt="%m-%d %H:%M",
314
+ )
315
+ logging.debug("Debug output is enabled!!!")
316
+
317
+ specids = []
318
+ with open(args.specids, "r") as fin:
319
+ for line in fin:
320
+ if line.startswith("#"):
321
+ continue
322
+ specids.append(int(line.strip()))
323
+ #! cols = args.cols.split(',')
324
+ # print(f'specids count={len(specids)} columns={cols}')
325
+
326
+ # run_retrieve(specids, columns=cols, xfer='p')
327
+ print(f"Starting benchmark on {here_now()}")
328
+ #! all = run_trials(specids)
329
+ print(f"Finished benchmark on {here_now()}")
330
+
331
+
332
+ def foo(x):
333
+ pass
334
+
335
+
336
+ if __name__ == "__main__":
337
+ main()