PyPI - sparclclient - Versions diffs - 1.2.1__py2.py3-none-any.whl - Mend

sparclclient 1.2.1__py2.py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (19) hide show

sparcl/Results.py +234 -0
sparcl/__init__.py +33 -0
sparcl/benchmarks/__init__.py +0 -0
sparcl/benchmarks/benchmarks.py +337 -0
sparcl/client.py +869 -0
sparcl/conf.py +34 -0
sparcl/exceptions.py +141 -0
sparcl/fields.py +160 -0
sparcl/gather_2d.py +233 -0
sparcl/notebooks/sparcl-examples.ipynb +1550 -0
sparcl/resample_spectra.py +41 -0
sparcl/sparc.ini +11 -0
sparcl/type_conversion.py +418 -0
sparcl/unsupported.py +65 -0
sparcl/utils.py +209 -0
sparclclient-1.2.1.dist-info/LICENSE +31 -0
sparclclient-1.2.1.dist-info/METADATA +14 -0
sparclclient-1.2.1.dist-info/RECORD +19 -0
sparclclient-1.2.1.dist-info/WHEEL +5 -0

sparcl/conf.py ADDED Viewed

@@ -0,0 +1,34 @@
+# Python Standard Library
+import configparser
+import os.path
+class Conf:
+    """
+    Configuration parameters for `ada_client`.
+    """
+    def __init__(self, conf_file=None):
+        config = configparser.ConfigParser()
+        conf_files = ["~/sparc.ini", "sparcl/sparc.ini"]
+        if conf_file is None:
+            for cf in conf_files:
+                if os.path.exists(os.path.expanduser(cf)):
+                    config.read(os.path.expanduser(cf))
+        if "ada.server" not in config:
+            raise Exception(
+                f"Could not find conf file in any of: "
+                f'{(",").join(conf_files)} '
+                f"Create one and try again."
+            )
+        self.config = config
+    @property
+    def server_baseurl(self):
+        return self.config["sparc.server"]["ServerBaseUrl"]
+    @property
+    def server_timeout(self):
+        return self.config["sparc.server"]["ServerTimout"]

sparcl/exceptions.py ADDED Viewed

@@ -0,0 +1,141 @@
+import traceback
+def genSparclException(response, verbose=False):
+    """Given status from Server response.json(), which is a dict, generate
+    a native SPARCL exception suitable for Science programs."""
+    content = response.content
+    if verbose:
+        print(f"Exception: response content={content}")
+    status = response.json()
+    # As of Python 3.10.0.alpha6, python "match" statement could be used
+    # instead of if-elif-else.
+    # https://docs.python.org/3.10/whatsnew/3.10.html#pep-634-structural-pattern-matching
+    if status.get("errorCode") == "BADPATH":
+        return BadPath(status.get("errorMessage"))
+    elif status.get("errorCode") == "BADQUERY":
+        return BadQuery(status.get("errorMessage"))
+    elif status.get("errorCode") == "UNKFIELD":
+        return UnknownField(status.get("errorMessage"))
+    elif status.get("errorCode") == "BADCONST":
+        return BadSearchConstraint(status.get("errorMessage"))
+    else:
+        return UnknownServerError(
+            f"{status.get('errorMessage')} " f"[{status.get('errorCode')}]"
+        )
+class BaseSparclException(Exception):
+    """Base Class for all SPARCL exceptions."""
+    error_code = "UNKNOWN"
+    error_message = "<NA>"
+    traceback = None
+    def get_subclass_name(self):
+        return self.__class__.__name__
+    def __init__(self, error_message, error_code=None):
+        Exception.__init__(self)
+        self.error_message = error_message
+        if error_code:
+            self.error_code = error_code
+        self.traceback = traceback.format_exc()
+    def __str__(self):
+        return f"[{self.error_code}] {self.error_message}"
+    def to_dict(self):
+        """Convert a SPARCL exception to a python dictionary"""
+        dd = dict(errorMessage=self.error_message, errorCode=self.error_code)
+        if self.traceback is not None:
+            dd["traceback"] = self.traceback
+        return dd
+class BadPath(BaseSparclException):
+    """A field path starts with a non-core field."""
+    error_code = "BADPATH"
+class BadQuery(BaseSparclException):
+    """Bad find constraints."""
+    error_code = "BADPATH"
+class BadInclude(BaseSparclException):
+    """Include list contains invalid data field(s)."""
+    error_code = "BADINCL"
+class UnknownServerError(BaseSparclException):
+    """Client got a status response from the SPARC Server that we do not
+    know how to decode."""
+    error_code = "UNKNOWN"
+class UnkDr(BaseSparclException):
+    """The Data Release is not known or not supported."""
+    error_code = "UNKDR"
+class ReadTimeout(BaseSparclException):
+    """The server did not send any data in the allotted amount of time."""
+    error_code = "RTIMEOUT"
+class UnknownSparcl(BaseSparclException):
+    """Unknown SPARCL error.  If this is ever raised (seen in a log)
+    create and use a new BaseSparcException exception that is more specific."""
+    error_code = "UNKSPARC"
+class UnknownField(BaseSparclException):
+    """Unknown field name for a record"""
+    error_code = "UNKFIELD"
+class NoCommonIdField(BaseSparclException):
+    """The field name for Science id field is not common to all Data Sets"""
+    error_code = "IDNOTCOM"
+class ServerConnectionError(BaseSparclException):
+    error_code = "SRVCONER"
+class BadSearchConstraint(BaseSparclException):
+    error_code = "BADSCONS"
+class NoRecords(BaseSparclException):
+    """Results did not contain any records"""
+    error_code = "NORECORD"
+class TooManyRecords(BaseSparclException):
+    """Too many records asked for in RETRIEVE"""
+    error_code = "TOOMANYR"
+class NoIDs(BaseSparclException):
+    """The length of the list of original IDs passed to the reorder
+    method was zero"""
+    error_code = "NOIDS"
+# error_code values should be no bigger than 8 characters 12345678

sparcl/fields.py ADDED Viewed

@@ -0,0 +1,160 @@
+"""Get Field names associated with various SPARCL conditions.
+"""
+# Python Standard Library
+from collections import defaultdict
+# External Packages
+import requests
+def validate_fields(datafields):
+    # datafields is simply:
+    #   DataField.objects.all().values(*atts)
+    drs = set([df["data_release"] for df in datafields])
+    core = {
+        df["origdp"]: df["newdp"] for df in datafields if df["storage"] == "C"
+    }
+    o2n = {
+        dr: {
+            df["origdp"]: df["newdp"]
+            for df in datafields
+            if df["data_release"] == dr
+        }
+        for dr in drs
+    }
+    for dr, df in o2n.items():
+        #  1-1 mapping origdp <-> newdp across all DR
+        if len(set(df.values())) != len(df):
+            msg = (
+                f"Data Release={dr} does not have a one-to-one mapping "
+                f"between Original and Science field names."
+            )
+            raise Exception(msg)
+        acore = defaultdict(list)  # ambiguous core fields(more than one value)
+        for k in core.keys():
+            if df.get(k) != core.get(k):
+                acore[k].append(df.get(k))
+        if len(acore) > 0:
+            msg = (
+                f"DataFields do not have the same "
+                f"Science field name for core values across all Data Sets. "
+                f"{dict(acore)}"
+            )
+            raise Exception(msg)
+    return True
+class Fields:  # Derived from a single query
+    """Lookup of Field Names"""
+    def __init__(self, apiurl):
+        # [rec, ...]
+        # where rec is dict containing keys:
+        # 'data_release', 'origdp', 'newdp', 'storage', 'default', 'all'
+        datafields = requests.get(f"{apiurl}/datafields/").json()
+        validate_fields(datafields)
+        dr_list = set(df["data_release"] for df in datafields)
+        self.datafields = datafields
+        # o2n[DR][InternalName] => ScienceName
+        self.o2n = {
+            dr: {
+                df["origdp"]: df["newdp"]
+                for df in datafields
+                if df["data_release"] == dr
+            }
+            for dr in dr_list
+        }
+        # n2o[DR][ScienceName] => InternalName
+        self.n2o = {
+            dr: {
+                df["newdp"]: df["origdp"]
+                for df in datafields
+                if df["data_release"] == dr
+            }
+            for dr in dr_list
+        }
+        self.all_drs = dr_list
+        self.all_fields = set([df["newdp"] for df in datafields])
+        self.datafields = datafields
+        # Per DataRelease: get Storage, Default, All for each (user) fieldname
+        # dr_attrs[DR][newdp] => dict[storage,default,all]
+        self.attrs = {
+            dr: {
+                df["newdp"]: {
+                    "storage": df["storage"],
+                    "default": df["default"],
+                    "all": df["all"],
+                }
+                for df in datafields
+                if df["data_release"] == dr
+            }
+            for dr in dr_list
+        }
+    @property
+    def all_datasets(self):
+        return self.all_drs
+    def _science_name(self, internal_name, dataset):
+        return self.o2n[dataset].get(internal_name)
+    def _internal_name(self, science_name, dataset):
+        #!return self.n2o[dataset][science_name]
+        return self.n2o[dataset].get(science_name)
+    def filter_fields(self, attr, dataset_list):
+        fields = set()
+        for dr in dataset_list:
+            for k, v in self.attrs[dr].items():
+                if v.get(attr):
+                    fields.add(k)
+        return fields
+    def default_retrieve_fields(self, dataset_list=None):
+        if dataset_list is None:
+            dataset_list = self.all_drs
+        return self.filter_fields("default", dataset_list)
+    def all_retrieve_fields(self, dataset_list=None):
+        if dataset_list is None:
+            dataset_list = self.all_drs
+        return self.filter_fields("all", dataset_list)
+    def common(self, dataset_list=None):
+        """Fields common to DATASET_LIST (or All datasets if None)"""
+        if dataset_list is None:
+            dataset_list = self.all_drs
+        return sorted(
+            set.intersection(
+                *[set(self.n2o[dr].keys()) for dr in dataset_list]
+            )
+        )
+    def common_internal(self, dataset_list=None):
+        """Fields common to DATASET_LIST (or All datasets if None)"""
+        if dataset_list is None:
+            dataset_list = self.all_drs
+        return set.intersection(
+            *[set(self.o2n[dr].keys()) for dr in dataset_list]
+        )
+    # There is probably an algorithm to partition ELEMENTS into
+    # the _minumum_ number of SETS such that the union of all SETS
+    # contains all ELEMENTS. For now, parition by Data Set (when used).
+    def field_partitions(self, fields):
+        """Partition FIELDS into the DataSets that contain them"""
+        dr_fields = defaultdict(list)
+        for field in fields:
+            for dr in self.all_drs:
+                if field in self.n2o[dr]:
+                    dr_fields[dr].append(field)
+        return dict(dr_fields)

sparcl/gather_2d.py ADDED Viewed

@@ -0,0 +1,233 @@
+"""Align or resample spectra related fields across multiple records."""
+# See client.py for Doctest example
+#
+# For info about problems with floating point,
+#   See:  https://docs.python.org/3/tutorial/floatingpoint.html
+#   Also: https://docs.python.org/3/library/decimal.html#floating-point-notes
+#
+from decimal import Decimal
+#
+import numpy as np
+#
+import sparcl.client
+# Map every wavelength of every record to index (ri,wi)
+# where
+#   ri: Record Index
+#   wi: Window Index (offset of wavelength in WINDOW)
+#   window: ordered list of wavelengths that include ALL unique
+#           wavelengths in all records
+#! def rec_woffset(records, window):
+#!     ar = np.ones([len(records), len(window)])
+#!     for ri, r in enumerate(records):
+#!         for wl in r.wavelength:
+#!             try:
+#!                 wi = window.index(wl)
+#!             except:
+#!                 continue
+#!             ar[ri,wi] = wl
+#!     return ar
+def _wavelength_offsets(records):
+    # sorted list of wavelengths from ALL records
+    window = sorted(
+        set(records[0].wavelength).union(*[r.wavelength for r in records[1:]])
+    )
+    # offsets[ri] = index into WINDOW
+    offsets = {
+        ri: window.index(rec.wavelength[0]) for ri, rec in enumerate(records)
+    }
+    return (window, offsets)
+def _validate_wavelength_alignment(records, window, offsets, precision=None):
+    PLACES = Decimal(10) ** -precision if precision is not None else None
+    #! print(f'DBG: PLACES={PLACES}')
+    # Given an exact wavelength match between first wl (wavelength) in a rec
+    # and the wl at its offset of WINDOW, ensure all the remaning wls
+    # in rec match the next N wls of WINDOW.
+    for ri, rec in enumerate(records):
+        for wi, rwl in enumerate(rec.wavelength):  # wi=recwavelengthIndex
+            if precision is None:
+                recwl = Decimal(rwl)
+            else:
+                recwl = Decimal(rwl).quantize(PLACES)
+            wwl = window[offsets[ri] + wi]
+            #! msg = (f'Wavelength in '
+            #!        f'Record[{ri}][{wi}] ({recwl}) does not match '
+            #!        f'Window[{offsets[ri]+wi} = offset[{ri}]={offsets[ri]} '
+            #!        f'+ {wi}]  ({wwl})'
+            #!        )
+            #! assert recwl == wwl, msg
+            if recwl != wwl:
+                msg = (
+                    f"The spectra cannot be aligned with the given"
+                    f' "precision" parameter ({precision}).'
+                    f" Try lowering the precision value."
+                )
+                raise Exception(msg)
+# We want to align a bunch of records by wavelength into a single
+# 2d numpy array (record vs wavelength).  In general, we
+# are not guaranteed that this is possible -- even if using only
+# records from a single DataSet. So validate it first.
+# (If not valid, allowing wavelength slop might help.)
+def _align_wavelengths(records):
+    window, offsets = _wavelength_offsets(records)
+    _validate_wavelength_alignment(records, window, offsets)
+    ar = np.ones([len(records), len(window)])
+    for ri, r in enumerate(records):
+        for wi, wl in enumerate(r.wavelength):
+            ar[ri, offsets[ri + wi]] = wl  # @@@WRONG!!! We want FLUX
+    return ar
+def _tt1(numrecs=20, dr="BOSS-DR16"):
+    client = sparcl.client.SparclClient()
+    found = client.find(constraints=dict(data_release=[dr]), limit=numrecs)
+    got = client.retrieve(found.ids)
+    records = got.records
+    window, offsets = _wavelength_offsets(records)
+    print(f"Built window len={len(window)}; offsets={offsets}")
+    # return records, window, offsets
+    ar = _align_wavelengths(records)
+    return ar
+# precision:: number of decimal places
+# "records" must contain "wavelength" field.
+def _wavelength_grid_offsets(records, precision=11):
+    PLACES = Decimal(10) ** -precision
+    # set of wavelengths from ALL records. Quantized to precision
+    gset = set()  # Grid SET
+    for r in records:
+        gset.update([Decimal(w).quantize(PLACES) for w in r.wavelength])
+    grid = sorted(gset)  # 1D sorted list of wavelengths (bigger than any rec)
+    #! print(f'DBG grid({len(grid)})[:10]={grid[:10]}')
+    # offsets[ri] = index into GRID
+    offsets = {
+        ri: grid.index(Decimal(rec.wavelength[0]).quantize(PLACES))
+        for ri, rec in enumerate(records)
+    }
+    return (grid, offsets)
+# return 2D numpy array of FLUX values that is aligned to wavelength GRID.
+# GRID is generally wider than flux for single record. Pad with NaN.
+def _flux_grid(records, grid, offsets, precision=None):
+    _validate_wavelength_alignment(records, grid, offsets, precision=precision)
+    ar = np.full([len(records), len(grid)], np.nan)
+    for ri, r in enumerate(records):
+        for fi, flux in enumerate(r.flux):
+            ar[ri, offsets[ri] + fi] = flux
+    return ar
+# RETURN 2D nparray(records,wavelengthGrid) = fieldValue
+def _field_grid(records, fieldName, grid, offsets, precision=None):
+    ar = np.full([len(records), len(grid)], np.nan)
+    for ri, r in enumerate(records):
+        for fi, fieldValue in enumerate(r[fieldName]):
+            ar[ri, offsets[ri] + fi] = fieldValue
+    return ar  # (wavelengthGrid, records)
+# RETURN 2D nparray(fields,wavelengthGrid) = fieldValue
+#! def rec_grid(rec, fields, grid, offsets, precision=None):
+#!     ar = np.full([len(fields), len(grid)], np.nan)
+#!     ri = 0
+#!     for fi, fieldValue in enumerate(r[fieldName]):
+#!         ar[ri, offsets[ri] + fi] = fieldValue
+#!     return ar  # (wavelengthGrid, fields)
+# Align flux from records into one array using quantization
+#! def flux_records(records, precision=None):
+#!     grid, offsets = wavelength_grid_offsets(records, precision=precision)
+#!     ar = _flux_grid(records, grid, offsets, precision=precision)
+#!     return ar, np.array([float(x) for x in grid])
+def _validate_spectra_fields(records, fields):
+    #! spectra_fields = [
+    #!     client.fields.n2o["BOSS-DR16"][k]
+    #!     for k, v in client.fields.attrs["BOSS-DR16"].items()
+    #!     if v["storage"] == "S"
+    #! ]
+    [k for k in records[0].keys() if not k.startswith("_")]
+# TOP level: Intended for access from Jupyter NOTEBOOK.
+# Align spectra related field from records into one array using quantization.
+def align_records(records, fields=["flux", "wavelength"], precision=7):
+    """Align given spectra-type fields to a common wavelength grid.
+    Args:
+        records (list): List of dictionaries.
+            The keys for all these dictionaries are Science Field Names.
+        fields (:obj:`list`, optional): List of Science Field Names of
+            spectra related fields to align and include in the results.
+            DEFAULT=['flux', 'wavelength']
+        precision (:obj:`int`, optional): Number of decimal points to use for
+            quantizing wavelengths into a grid.
+            DEFAULT=7
+    Returns:
+        tuple containing:
+        - ar_dict(dict): Dictionary of 2D numpy arrays keyed by Field Name.
+              Each array is shape: (numRecs, numzGridWavelengths)
+        - grid(ndarray): 1D numpy array containing wavelength values.
+    Example:
+        >>> client = sparcl.client.SparclClient()
+        >>> specflds = ['wavelength', 'model']
+        >>> cons = {"data_release": ['BOSS-DR16']}
+        >>> found = client.find(constraints=cons, limit=21)
+        >>> got = client.retrieve(found.ids, include=specflds)
+        >>> ar_dict, grid = align_records(got.records, fields=specflds)
+        >>> ar_dict['model'].shape
+        (21, 4666)
+    """
+    # Report Garbage In
+    if "wavelength" not in fields:
+        msg = (
+            f'You must provide "wavelength" in the list provided'
+            f' in the "fields" paramter.  Got: {fields}'
+        )
+        raise Exception(msg)
+    if "wavelength" not in records[0]:
+        msg = (
+            f'Records must contain the "wavelength" field.'
+            f" The first record contains fields: {sorted(records[0].keys())}"
+        )
+        raise Exception(msg)
+    #! _validate_spectra_fields(records, fields)
+    grid, offsets = _wavelength_grid_offsets(records, precision=precision)
+    _validate_wavelength_alignment(records, grid, offsets, precision=precision)
+    # One slice for each field; each slice a 2darray(wavelength, record)=fldVal
+    adict = dict()
+    for fld in fields:
+        ar = _field_grid(records, fld, grid, offsets, precision=None)
+        adict[fld] = ar
+    return adict, np.array([float(x) for x in grid])
+# with np.printoptions(threshold=np.inf, linewidth=210,
+#   formatter=dict(float=lambda v: f'{v: > 7.3f}')): print(ar.T)  # noqa: E501
+if __name__ == "__main__":
+    import doctest
+    doctest.testmod()