PyPI - sparclclient - Versions diffs - 1.2.1__py2.py3-none-any.whl - Mend

sparclclient 1.2.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

sparcl/Results.py +234 -0
sparcl/__init__.py +33 -0
sparcl/benchmarks/__init__.py +0 -0
sparcl/benchmarks/benchmarks.py +337 -0
sparcl/client.py +869 -0
sparcl/conf.py +34 -0
sparcl/exceptions.py +141 -0
sparcl/fields.py +160 -0
sparcl/gather_2d.py +233 -0
sparcl/notebooks/sparcl-examples.ipynb +1550 -0
sparcl/resample_spectra.py +41 -0
sparcl/sparc.ini +11 -0
sparcl/type_conversion.py +418 -0
sparcl/unsupported.py +65 -0
sparcl/utils.py +209 -0
sparclclient-1.2.1.dist-info/LICENSE +31 -0
sparclclient-1.2.1.dist-info/METADATA +14 -0
sparclclient-1.2.1.dist-info/RECORD +19 -0
sparclclient-1.2.1.dist-info/WHEEL +5 -0

sparcl/Results.py ADDED Viewed

@@ -0,0 +1,234 @@
+"""Containers for results from SPARCL Server.
+These include results of client.retrieve() client.find().
+"""
+from collections import UserList
+#!import copy
+from sparcl.utils import _AttrDict
+# from sparcl.gather_2d import bin_spectra_records
+import sparcl.exceptions as ex
+from warnings import warn
+class Results(UserList):
+    def __init__(self, dict_list, client=None):
+        super().__init__(dict_list)
+        self.hdr = dict_list[0]
+        self.recs = dict_list[1:]
+        self.client = client
+        self.fields = client.fields
+        self.to_science_fields()
+        # HACK 12/14/2023 -sp- to fix UUID problem presumably
+        # produced on stack version upgrade (to Django 4.2, postgres 13+)
+        # Done per AB for expediency since real solution will be easier
+        # after field-renaming is removed.
+        for rec in self.recs:
+            if "sparcl_id" in rec:
+                rec["sparcl_id"] = str(rec["sparcl_id"])
+        # END __init__()
+    # https://docs.python.org/3/library/collections.html#collections.deque.clear
+    def clear(self):
+        """Delete the contents of this collection."""
+        super().clear()
+        self.hdr = {}
+        self.recs = []
+    @property
+    def info(self):
+        """Info about this collection.
+        e.g. Warnings, parameters used to get the collection, etc."""
+        return self.hdr
+    @property
+    def count(self):
+        """Number of records in this collection."""
+        return len(self.recs)
+    @property
+    def records(self):
+        """Records in this collection. Each record is a dictionary."""
+        return self.recs
+    def json(self):
+        return self.data
+    # Convert Internal field names to Science field names.
+    # SIDE-EFFECT: modifies self.recs
+    def to_science_fields(self):  # from_orig
+        newrecs = list()
+        for rec in self.recs:
+            newrec = dict()
+            dr = rec["_dr"]
+            keep = True
+            for orig in rec.keys():
+                if orig == "_dr":
+                    # keep DR around unchanged. We need it to rename back
+                    # to Internal Field Names later.
+                    newrec[orig] = rec[orig]
+                else:
+                    new = self.fields._science_name(orig, dr)
+                    if new is None:
+                        keep = False  # We don't have name mapping, toss rec
+                    newrec[new] = rec[orig]
+            if keep:
+                newrecs.append(_AttrDict(newrec))
+        self.recs = newrecs
+    # Convert Science field names to Internal field names.
+    def to_internal_fields(self):
+        for rec in self.recs:
+            dr = rec.get("_dr")
+            for new in rec.keys():
+                if new == "_dr":
+                    # keep DR around unchanged. We need it to rename back
+                    # to Internal Field Names later.
+                    continue
+                new = self.fields._internal_name(new, dr)
+                rec[new] = rec.pop(new)
+    def science_to_internal_fields(self):
+        newrecs = list()
+        for rec in self.recs:
+            newrec = dict()
+            dr = rec["_dr"]
+            keep = True
+            for sci_name in rec.keys():
+                if sci_name == "_dr":
+                    # keep DR around unchanged. We need it to rename back
+                    # to Internal Field Names later.
+                    newrec[sci_name] = rec[sci_name]
+                else:
+                    new = self.fields._internal_name(sci_name, dr)
+                    if new is None:
+                        keep = False
+                    newrec[new] = rec[sci_name]
+            if keep:
+                newrecs.append(_AttrDict(newrec))
+        self.recs = newrecs
+        return self.recs
+    def reorder(self, ids_og):
+        """
+        Reorder the retrieved records to be in the same
+        order as the original IDs passed to client.retrieve().
+        Args:
+            ids_og (:obj:`list`): List of sparcl_ids or specIDs.
+        Returns:
+            reordered (:class:`~sparcl.Results.Retrieved`): Contains header and
+                                                            reordered records.
+            # none_idx (:obj:`list`): List of indices where record is None.
+        """
+        if len(ids_og) <= 0:
+            msg = (
+                f"The list of IDs passed to the reorder method "
+                f"does not contain any sparcl_ids or specIDs."
+            )
+            raise ex.NoIDs(msg)
+        elif len(self.recs) <= 0:
+            msg = (
+                "The retrieved or found results did not "
+                "contain any records."
+            )
+            raise ex.NoRecords(msg)
+        else:
+            # Transform science fields to internal fields
+            new_recs = self.science_to_internal_fields()
+            # Get the ids or specids from retrieved records
+            if type(ids_og[0]) is str:
+                ids_re = [f["sparcl_id"] for f in new_recs]
+            elif type(ids_og[0]) is int:
+                ids_re = [f["specid"] for f in new_recs]
+            # Enumerate the original ids
+            dict_og = {x: i for i, x in enumerate(ids_og)}
+            # Enumerate the retrieved ids
+            dict_re = {x: i for i, x in enumerate(ids_re)}
+            # Get the indices of the original ids. Set to None if not found
+            idx = [dict_re.get(key, None) for key in dict_og.keys()]
+            # Get the indices of None values
+            none_idx = [i for i, v in enumerate(idx) if v is None]
+            # Reorder the retrieved records
+            reordered = [self.recs[i] for i in idx if i is not None]
+            # Insert dummy record(s) if applicable
+            dummy_record = "{'id': None, 'specid': None, '_dr': 'SDSS-DR16'}"
+            for i in none_idx:
+                reordered.insert(
+                    i, {"sparcl_id": None, "specid": None, "_dr": "SDSS-DR16"}
+                )
+            reordered.insert(0, self.hdr)
+            meta = reordered[0]
+            if len(none_idx) > 0:
+                msg = (
+                    f"{len(none_idx)} sparcl_ids or specIDs were "
+                    f"not found in "
+                    f'the database. Use "client.missing()" '
+                    f"to get a list of the unavailable IDs. "
+                    f"To maintain correct reordering, a dummy "
+                    f"record has been placed at the indices "
+                    f"where no record was found. Those "
+                    f"indices are: {none_idx}. The dummy "
+                    f"record will appear as follows: "
+                    f"{dummy_record}. "
+                )
+                meta["status"].update({"warnings": [msg]})
+                warn(msg, stacklevel=2)
+        return Results(reordered, client=self.client)
+# For results of retrieve()
+class Retrieved(Results):
+    """Holds spectra records (and header)."""
+    def __init__(self, dict_list, client=None):
+        super().__init__(dict_list, client=client)
+    def __repr__(self):
+        return f"Retrieved Results: {len(self.recs)} records"
+#!    def bin_spectra(self):
+#!        """Align flux from all records by common wavelength bin.
+#!
+#!        A value of nan is used where a record does not contain a flux
+#!        value for a specific bin.
+#!
+#!        Returns:
+#!           flux: 2d numpy array with shape (numRecords, numWavelengthBins)
+#!                 Flux value for each record, each bin
+#!           wavs: 1d numpy array with shape (numWavelengthBins)
+#!                 Wavelength values for each bin
+#!
+#!        Example:
+#!            >>> client = sparcl.client.SparclClient()
+#!            >>> found = client.find(
+#!                            constraints={"data_release": ['BOSS-DR16']},
+#!                            limit=10)
+#!            >>> got = client.retrieve(found.ids)
+#!            >>> flux2d,wavs = got.bin_spectra()
+#!
+#!        """
+#!        flux2d, wavs = bin_spectra_records(self.recs)
+#!        return flux2d, wavs
+class Found(Results):
+    """Holds metadata records (and header)."""
+    def __init__(self, dict_list, client=None):
+        super().__init__(dict_list, client=client)
+    def __repr__(self):
+        return f"Find Results: {len(self.recs)} records"
+    @property
+    def ids(self):
+        """List of unique identifiers of matched records."""
+        #! dr = list(self.fields.all_drs)[0]
+        return [d.get("sparcl_id") for d in self.recs]

sparcl/__init__.py ADDED Viewed

@@ -0,0 +1,33 @@
+"""
+A client for getting spectra and meta-data from NOIRLab.
+"""
+# List of packages to import when "from sparcl import *" is used
+__all__ = ["client", "align_records"]
+# See semantic versioning
+# BUT PyPi requires honoring versions like this:
+# https://packaging.python.org/specifications/core-metadata/
+# https://www.python.org/dev/peps/pep-0440/
+#
+# '0.3.0-alpha3.23' is an invalid value for Version.
+#  Error: Start and end with a letter or numeral containing only ASCII
+#  numeric and '.', '_' and '-'.
+#
+# https://semver.org/ yields possible versions that violate PEP-0440
+# __version__ = '0.3.21'
+# __version__ = '0.1a3.dev22'
+# __version__ = '0.3.0-alpha3.23'
+# __version__ = '0.3.22'
+# must mach: [N!]N(.N)*[{a|b|rc}N][.postN][.devN]
+# Example of a correct version string: '0.4.0a3.dev35'
+# __version__ = '1.1'
+# __version__ = '1.2.0b4'
+# __version__ = '1.2.0'  # Release
+#__version__ = "1.2.1b3"
+__version__ = "1.2.1"

sparcl/benchmarks/__init__.py ADDED Viewed

File without changes

sparcl/benchmarks/benchmarks.py ADDED Viewed

@@ -0,0 +1,337 @@
+#! /usr/bin/env python
+"""Benchmark speed of SPARC spectra retrieve with various parameters.
+"""
+# EXAMPLES:
+# cd ~/sandbox/sparclclient
+# python3 -m sparcl.benchmarks.benchmarks ~/data/sparc/sids5.list
+# python3 -m sparcl.benchmarks.benchmarks ~/data/sparc/sids644.list
+# Alice reported 22 minutes on 64K retrieved from specClient (rate=48 spec/sec)
+#   slack.spectro: 3/31/2021
+# Standard Python library
+import argparse
+import logging
+import os
+from pprint import pformat
+# External packages
+import psutil
+# Local packages
+from ..client import SparclClient
+from ..utils import tic, toc, here_now
+# rooturl = 'http://localhost:8030/' #@@@
+rooturl = "http://sparc1.datalab.noirlab.edu:8000/"
+def human_size(num, units=["b", "KB", "MB", "GB", "TB", "PB", "EB"]):
+    """Returns a human readable string representation of NUM."""
+    return (
+        f"{num:.1f} {units[0]}"
+        if num < 1024
+        else human_size(num / 1000, units[1:])
+    )
+# with open('/data/sparc/sids5.list') as f:
+#      specids = [int(line.strip()) for line in f if not line.startswith('#')]
+def run_retrieve(specids, columns=None, xfer="p", verbose=True):
+    #!print(f'Retrieving {len(specids):,} spectra')
+    psutil.cpu_percent()  # begin interval
+    client = SparclClient(url=rooturl)
+    result = dict(numcols=len(columns), numspecids=len(specids))
+    if verbose:
+        print(f"Experiment: {pformat(result)}")
+    tic()
+    data = client.retrieve(specids, columns=columns, xfer=xfer)
+    elapsed = toc()
+    #!cpu = psutil.cpu_percent(interval=1)
+    if verbose:
+        print(f"len(specids)={len(specids)} len(data)={len(data)}")
+    assert len(specids) == len(data)  # @@@ but some of ingest may have failed
+    assert len(data[0]["spectra__coadd__flux"]) > 1000
+    result.update(
+        elapsed=elapsed,
+        retrieved=len(data),
+        rate=len(data) / elapsed,
+        end_smrem=psutil.swap_memory().free,
+        end_vmrem=psutil.virtual_memory().available,
+        end_cpuload=os.getloadavg()[1],
+        end_cpuperc=psutil.cpu_percent(),  # end interval
+    )
+    return result
+def run_paged_retrieve(
+    specids, columns=None, xfer="p", page=5000, verbose=True, keepall=False
+):
+    """Do 1 more more PAGE size retrieves to get data for all specids"""
+    print(f"Paged Retrieve of {len(specids):,} spectra")
+    psutil.cpu_percent()  # begin interval
+    client = SparclClient(url=rooturl)
+    result = dict(
+        numcols=len(columns), numspecids=len(specids), xfer=xfer, page=page
+    )
+    if verbose:
+        print(f"Experiment: {pformat(result)}")
+    data = []
+    datacnt = 0
+    tic()
+    for cnt in range(0, len(specids), page):
+        pdata = client.retrieve(
+            specids[cnt : cnt + page], columns=columns, xfer=xfer
+        )
+        datacnt += len(pdata)
+        if keepall:
+            data.extend(pdata)
+    elapsed = toc()
+    #! cpu = psutil.cpu_percent(interval=1)
+    if verbose:
+        print(f"len(specids)={len(specids)} datacnt={datacnt}")
+    # assert len(specids) == len(data)   # @@@but some ingests may have failed
+    #!assert len(data[0]['spectra__coadd__flux']) > 1000 # @@@
+    result.update(
+        elapsed=elapsed,
+        retrieved=len(data),
+        rate=datacnt / elapsed,
+        end_smrem=psutil.swap_memory().free,
+        end_vmrem=psutil.virtual_memory().available,
+        end_cpuload=os.getloadavg()[1],
+        end_cpuperc=psutil.cpu_percent(),  # end interval
+    )
+    return result
+# flux,loglam,ivar,and_mask,or_mask,wdisp,sky,model
+allcols = [
+    "flux",
+    "loglam",
+    "ivar",
+    "and_mask",
+    "or_mask",
+    "wdisp",
+    "sky",
+    "model",
+]
+experiment_0 = dict(
+    xfers=["p"],
+    specidcnts=[600, 60],
+    numcols=range(1, 3),
+)
+experiment_1 = dict(
+    xfers=["p"],
+    specidcnts=[6, 60, 600, 6000, 30000],
+    numcols=range(1, 3),
+    # numcols=range(1,len(allcols)+1),
+)
+experiment_2 = dict(
+    xfers=["p"],
+    specidcnts=[1000, 100, 10],
+    numcols=range(1, len(allcols) + 1),
+)
+experiment_3 = dict(
+    xfers=["p"],
+    specidcnts=[
+        1000,
+    ],
+    numcols=reversed(range(1, len(allcols) + 1)),
+)
+experiment_8 = dict(
+    xfers=[
+        "p",
+    ],
+    specidcnts=[
+        65000,
+    ],
+    numcols=[1, 2, 8],
+)
+experiment_9 = dict(
+    xfers=["p", "j"],
+    specidcnts=sorted(set([min(7 * 10**x, 65000) for x in range(6)])),
+    numcols=range(1, len(allcols) + 1),
+)
+def run_trials(allspecids, verbose=True):
+    # ex = experiment_9 #@@@
+    ex = experiment_8  # @@@
+    xfers = ex["xfers"]
+    specidcnts = ex["specidcnts"]
+    numcols = ex["numcols"]
+    klist = ["elapsed", "numcols", "numspecids", "page", "rate", "xfer"]
+    all = []
+    for xfer in xfers:
+        for n in numcols:
+            cols = allcols[:n]
+            for specidcnt in specidcnts:
+                specids = allspecids[:specidcnt]
+                #!result = run_retrieve(specids, columns=cols, xfer='p')
+                result = run_paged_retrieve(specids, columns=cols, xfer="p")
+                if verbose:
+                    # print(f'Run-Result: {pformat(result)}')
+                    reduced = dict(
+                        (k, result[k]) for k in result.keys() if k in klist
+                    )
+                    print(f"Run-Result: {reduced}")
+                all.append(result)
+    report(all, len(allspecids), xfer=xfer)
+    return all
+def report(results, specidcnt, xfer=None, bandwidth=False):
+    hostname, now = here_now()
+    min1, min5, min15 = os.getloadavg()
+    #!smrem = psutil.swap_memory().free
+    #!vmrem = psutil.virtual_memory().available
+    #!cpuperc = psutil.cpu_percent(interval=1)
+    if bandwidth:
+        pass
+        #! s = speedtest.Speedtest()
+        #! ul_speed = s.upload(threads=1)
+        #! dl_speed = s.download(threads=1)
+    else:
+        #! ul_speed = 0
+        dl_speed = 0
+    #! Upload speed:    {human_size(ul_speed)}
+    print(f"\nBenchmark run on {hostname} at {now} with {specidcnt} specids.")
+    print(
+        f"""
+Transfer Method: {"Pickle" if xfer=='p' else "JSON"}
+Download speed:  {human_size(dl_speed)}
+    """
+    )
+    # Load Avg:        {min5:.1f}
+    #         (avg num processes running over last 5 minutes)
+    # CPU utilization: {cpuperc:.0f}%
+    # Swap Mem Avail:    {human_size(smrem)}
+    # Virtual Mem Avail: {human_size(vmrem)}
+    # (Above statistics are for CLIENT.)
+    #!print(f'Column\tSID\tRate \tAvg \tCPU \tSwap\tVirt')
+    #!print(f' Count\tCnt\ts/sec\tLoad\tUtil\t Mem\t Mem')
+    #!print(f'------\t---\t-----\t----\t----')
+    print(f"Column\tSID\tRate ")
+    print(f" Count\tCnt\ts/sec")
+    print(f"------\t---\t-----")
+    for r in results:
+        print(
+            (
+                "{numcols}\t"
+                "{numspecids}\t"
+                "{rate:.0f}\t"
+                #!"{end_cpuload:.02f}\t"
+                #!"{end_cpuperc:.0f}%\t"
+                #!"{smrem}\t"
+                #!"{vmrem}\t"
+            ).format(**r)
+        )
+        # smrem=human_size(r['end_smrem']),
+        # vmrem=human_size(r['end_vmrem']),
+    print(
+        """
+LEGEND:
+  Rate:: spectra/second
+  Transfer method:: Pickle, Json
+  Load:: Number of processes in system run queue averaged over last 5 minutes.
+  """
+    )
+    return "Done"
+##############################################################################
+def my_parser():
+    parser = argparse.ArgumentParser(
+        #!version='1.0.1',
+        description="My shiny new python program",
+        epilog='EXAMPLE: %(prog)s a b"',
+    )
+    allcols = [
+        "flux",
+        "loglam",
+        "ivar",
+        "and_mask",
+        "or_mask",
+        "wdisp",
+        "sky",
+        "model",
+    ]
+    #!dftcols = 'flux,loglam'
+    dftcols = ",".join(allcols)
+    parser.add_argument(
+        "specids",
+        type=argparse.FileType("r"),
+        help=("File containing list of " "specobjids. One per line."),
+    )
+    parser.add_argument(
+        "--cols",
+        # choices=allcols,
+        default=dftcols,
+        help=(
+            f"List of comma seperated columns to get. " f'Default="{dftcols}"'
+        ),
+    )
+    parser.add_argument(
+        "--xfer",
+        default="p",
+        help="Mode to use to transfer from Server to Client.",
+    )
+    parser.add_argument(
+        "--loglevel",
+        help="Kind of diagnostic output",
+        choices=["CRTICAL", "ERROR", "WARNING", "INFO", "DEBUG"],
+        default="WARNING",
+    )
+    return parser
+def main():
+    parser = my_parser()
+    args = parser.parse_args()
+    args.specids.close()
+    args.specids = args.specids.name
+    log_level = getattr(logging, args.loglevel.upper(), None)
+    if not isinstance(log_level, int):
+        parser.error("Invalid log level: %s" % args.loglevel)
+    logging.basicConfig(
+        level=log_level,
+        format="%(levelname)s %(message)s",
+        datefmt="%m-%d %H:%M",
+    )
+    logging.debug("Debug output is enabled!!!")
+    specids = []
+    with open(args.specids, "r") as fin:
+        for line in fin:
+            if line.startswith("#"):
+                continue
+            specids.append(int(line.strip()))
+    #! cols = args.cols.split(',')
+    # print(f'specids count={len(specids)} columns={cols}')
+    # run_retrieve(specids, columns=cols, xfer='p')
+    print(f"Starting benchmark on {here_now()}")
+    #! all = run_trials(specids)
+    print(f"Finished benchmark on {here_now()}")
+def foo(x):
+    pass
+if __name__ == "__main__":
+    main()