PyPI - timesat-cli - Versions diffs - 1.4.3__py3-none-any.whl - Mend

timesat-cli 1.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (16) hide show

timesat_cli/__init__.py +8 -0
timesat_cli/__main__.py +93 -0
timesat_cli/config.py +160 -0
timesat_cli/config_s3.py +58 -0
timesat_cli/csvutils.py +93 -0
timesat_cli/dateutils.py +118 -0
timesat_cli/fsutils.py +64 -0
timesat_cli/processing.py +216 -0
timesat_cli/qa.py +25 -0
timesat_cli/readers.py +220 -0
timesat_cli/writers.py +47 -0
timesat_cli-1.4.3.dist-info/METADATA +199 -0
timesat_cli-1.4.3.dist-info/RECORD +16 -0
timesat_cli-1.4.3.dist-info/WHEEL +4 -0
timesat_cli-1.4.3.dist-info/entry_points.txt +3 -0
timesat_cli-1.4.3.dist-info/licenses/LICENSE +674 -0

timesat_cli/__init__.py ADDED Viewed

@@ -0,0 +1,8 @@
+"""
+TIMESAT CLI package.
+This package provides a Python interface and CLI wrapper for running TIMESAT
+processing pipelines.
+"""

timesat_cli/__main__.py ADDED Viewed

@@ -0,0 +1,93 @@
+# src/timesat_cli/__main__.py
+# Windows: set PYTHONPATH=src
+# Windows: python -m timesat_cli -t 12
+import argparse
+import os
+import sys
+def _platform() -> str:
+    if sys.platform.startswith("win"):
+        return "windows"
+    if sys.platform == "darwin":
+        return "mac"
+    return "linux"
+def _validate_threads(value: int | None) -> int | None:
+    """
+    None  -> not provided (do not override config)
+    >0    -> use exactly that
+    0     -> treat as "use all logical CPUs" (optional behavior)
+    """
+    if value is None:
+        return None
+    if not isinstance(value, int):
+        raise argparse.ArgumentTypeError("threads must be an integer")
+    if value < 0:
+        raise argparse.ArgumentTypeError("threads must be >= 0")
+    cpu = os.cpu_count() or 1
+    if value == 0:
+        return cpu
+    if value > cpu * 4:
+        # protect against accidental huge numbers; adjust policy if you prefer
+        raise argparse.ArgumentTypeError(
+            f"threads={value} is too large for this machine (cpu_count={cpu})."
+        )
+    return value
+def _set_thread_env(threads: int, plat: str) -> None:
+    """
+    Set environment variables BEFORE importing Fortran / NumPy / MKL code.
+    Uses slightly different defaults by platform.
+    """
+    t = str(int(threads))
+    # Always safe / common:
+    os.environ["OMP_NUM_THREADS"] = t
+    os.environ.setdefault("OPENBLAS_NUM_THREADS", t)
+    os.environ.setdefault("MKL_NUM_THREADS", t)
+    os.environ.setdefault("NUMEXPR_NUM_THREADS", t)
+    # Intel OpenMP runtime knobs (most relevant on Windows; harmless elsewhere)
+    if plat == "windows":
+        os.environ.setdefault("KMP_NUM_THREADS", t)
+        os.environ.setdefault("OMP_DYNAMIC", "FALSE")  # avoid auto-reducing threads
+        # Optional: if you see odd scheduling/perf, you can try enabling one:
+        # os.environ.setdefault("KMP_AFFINITY", "granularity=fine,compact,1,0")
+        # os.environ.setdefault("KMP_BLOCKTIME", "0")
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Run TIMESAT processing pipeline.")
+    parser.add_argument("settings_json", help="Path to the JSON configuration file.")
+    parser.add_argument(
+        "-t", "--threads",
+        type=int,
+        default=None,
+        help="Number of threads. Use 0 to mean 'all CPUs'.",
+    )
+    args = parser.parse_args()
+    plat = _platform()
+    threads = _validate_threads(args.threads)
+    # IMPORTANT: set env vars before importing processing / Fortran extension
+    if threads is not None:
+        _set_thread_env(threads, plat)
+    from .processing import run
+    run(args.settings_json)
+if __name__ == "__main__":
+    main()

timesat_cli/config.py ADDED Viewed

@@ -0,0 +1,160 @@
+from __future__ import annotations
+import json
+from dataclasses import dataclass
+from typing import List, Sequence, Tuple
+import numpy as np
+@dataclass
+class ClassParams:
+    landuse: int
+    p_fitmethod: int
+    p_smooth: float
+    p_nenvi: int
+    p_wfactnum: float
+    p_startmethod: int
+    p_startcutoff: Tuple[float, float]
+    p_low_percentile: float
+    p_fillbase: int
+    p_seasonmethod: int
+    p_seapar: float
+@dataclass
+class Settings:
+    s3env: str
+    image_file_list: str
+    quality_file_list: str
+    tv_list: str
+    lc_file: str
+    outputfolder: str
+    imwindow: Sequence[int]
+    p_band_id: int
+    p_ignoreday: int
+    p_ylu: np.ndarray
+    p_a: List[List[float]]
+    p_st_timestep: int
+    p_nodata: float
+    p_davailwin: int
+    p_outlier: int
+    p_printflag: int
+    max_memory_gb: float
+    scale: float
+    offset: float
+    p_hrvppformat: int
+    p_nclasses: int
+    classes: List[ClassParams]
+    outputvariables: int
+@dataclass
+class Config:
+    settings: Settings
+def _as_array(value, dtype=float, fortran=False):
+    arr = np.array(value, dtype=dtype)
+    if fortran:
+        arr = np.asfortranarray(arr)
+    return arr
+def load_config(jsfile: str) -> Config:
+    with open(jsfile, "r") as f:
+        data = json.load(f)
+    s = data["settings"]
+    nclasses = int(s["p_nclasses"]["value"])
+    classes: List[ClassParams] = []
+    for i in range(nclasses):
+        k = f"class{i+1}"
+        c = data[k]
+        classes.append(
+            ClassParams(
+                landuse=int(c["landuse"]["value"]),
+                p_fitmethod=int(c["p_fitmethod"]["value"]),
+                p_smooth=float(_as_array(c["p_smooth"]["value"], dtype="double")),
+                p_nenvi=int(c["p_nenvi"]["value"]),
+                p_wfactnum=float(_as_array(c["p_wfactnum"]["value"], dtype="double")),
+                p_startmethod=int(c["p_startmethod"]["value"]),
+                p_startcutoff=tuple(_as_array(c["p_startcutoff"]["value"], dtype="double", fortran=True)),
+                p_low_percentile=float(_as_array(c["p_low_percentile"]["value"], dtype="double")),
+                p_fillbase=int(c["p_fillbase"]["value"]),
+                p_seasonmethod=int(c["p_seasonmethod"]["value"]),
+                p_seapar=float(_as_array(c["p_seapar"]["value"], dtype="double")),
+            )
+        )
+    settings = Settings(
+        s3env=s["s3env"]["value"],
+        image_file_list=s["image_file_list"]["value"],
+        quality_file_list=s["quality_file_list"]["value"],
+        tv_list=s["tv_list"]["value"],
+        lc_file=s["lc_file"]["value"],
+        outputfolder=s["outputfolder"]["value"],
+        imwindow=s["imwindow"]["value"],
+        p_band_id=int(s["p_band_id"]["value"]),
+        p_ignoreday=int(s["p_ignoreday"]["value"]),
+        p_ylu=_as_array(s["p_ylu"]["value"], dtype="double", fortran=True),
+        p_a=s["p_a"]["value"],
+        p_st_timestep=int(s["p_st_timestep"]["value"]),
+        p_nodata=float(s["p_nodata"]["value"]),
+        p_davailwin=int(s["p_davailwin"]["value"]),
+        p_outlier=int(s["p_outlier"]["value"]),
+        p_printflag=int(s["p_printflag"]["value"]),
+        max_memory_gb=float(s["max_memory_gb"]["value"]),
+        scale=float(s["scale"]["value"]),
+        offset=float(s["offset"]["value"]),
+        p_hrvppformat=int(s["p_hrvppformat"]["value"]),
+        outputvariables=int(s["outputvariables"]["value"]),
+        p_nclasses=nclasses,
+        classes=classes,
+    )
+    return Config(settings=settings)
+def build_param_array(
+    s,
+    attr: str,
+    dtype,
+    size: int = 255,
+    shape: Tuple[int, ...] | None = None,
+    fortran_2d: bool = False
+):
+    """
+    Build a parameter array for TIMESAT class settings.
+    Parameters
+    ----------
+    s : object
+        Settings container with `classes` iterable.
+    attr : str
+        Attribute on each class object in `s.classes` (e.g., 'p_smooth').
+    dtype : numpy dtype or dtype string (e.g., 'uint8', 'double').
+    size : int
+        Length of the first dimension (TIMESAT expects 255).
+    shape : tuple[int, ...] | None
+        Extra trailing shape for per-class vectors (e.g., (2,) for p_startcutoff).
+    fortran_2d : bool
+        If True and `shape==(2,)`, allocate (size,2) with order='F' to mirror legacy layout.
+    Returns
+    -------
+    np.ndarray
+        Filled parameter array.
+    """
+    if shape is None:
+        arr = np.zeros(size, dtype=dtype)
+        for i, c in enumerate(s.classes):
+            arr[i] = getattr(c, attr)
+        return arr
+    full_shape = (size, *shape)
+    order = 'F' if fortran_2d and len(shape) == 1 and shape[0] > 1 else 'C'
+    arr = np.zeros(full_shape, dtype=dtype, order=order)
+    for i, c in enumerate(s.classes):
+        arr[i, ...] = getattr(c, attr)
+    return arr

timesat_cli/config_s3.py ADDED Viewed

@@ -0,0 +1,58 @@
+from __future__ import annotations
+import os
+from dotenv import load_dotenv
+from urllib.parse import urlparse
+import boto3
+from botocore.config import Config
+__all__ = ["load_s3_config","build_rasterio_s3_opts","to_vsis3_paths"]
+def load_s3_config():
+    """
+    Load and validate S3 / CloudFerro configuration from environment variables.
+    Returns a dict with validated values.
+    """
+    load_dotenv()  # default path
+    config = {
+        "AWS_ACCESS_KEY_ID": os.getenv("AWS_ACCESS_KEY_ID"),
+        "AWS_SECRET_ACCESS_KEY": os.getenv("AWS_SECRET_ACCESS_KEY"),
+        "AWS_SESSION_TOKEN": os.getenv("AWS_SESSION_TOKEN"),  # optional
+        "S3_BUCKET": os.getenv("S3_BUCKET"),
+        "ENDPOINT_URL": os.getenv("ENDPOINT_URL"),
+    }
+    required = [
+        config["AWS_ACCESS_KEY_ID"],
+        config["AWS_SECRET_ACCESS_KEY"],
+        config["S3_BUCKET"],
+        config["ENDPOINT_URL"],
+    ]
+    if not all(required):
+        raise RuntimeError(
+            "Missing required environment variables. "
+            "Check AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, "
+            "S3_BUCKET, ENDPOINT_URL."
+        )
+    return config
+def build_rasterio_s3_opts(cfg: dict) -> dict:
+    return boto3.client(
+        "s3",
+        endpoint_url=cfg["ENDPOINT_URL"],          # your S3-compatible endpoint
+        aws_access_key_id=cfg["AWS_ACCESS_KEY_ID"],
+        aws_secret_access_key=cfg["AWS_SECRET_ACCESS_KEY"],
+        aws_session_token=cfg.get("AWS_SESSION_TOKEN"),
+        config=Config(signature_version="s3v4", s3={"addressing_style": "path"}),
+    )
+def to_vsis3_paths(s3, bucket, key, expires=3600):
+    return s3.generate_presigned_url(
+        "get_object",
+        Params={"Bucket": bucket, "Key": key},
+        ExpiresIn=expires,
+    )

timesat_cli/csvutils.py ADDED Viewed

@@ -0,0 +1,93 @@
+# csvutils.py
+from __future__ import annotations
+from typing import Tuple, Iterable, Optional
+import numpy as np
+import pandas as pd
+import datetime as dt
+__all__ = ["read_timeseries_csv", "write_timesat_csv_outputs"]
+def _parse_time_column(col: Iterable[str | int]) -> np.ndarray:
+    """
+    Accepts YYYYDOY (e.g., 2020123) or YYYYMMDD (e.g., 20200123) or ISO 'YYYY-MM-DD'.
+    Returns uint32 vector in YYYYDOY.
+    """
+    out = []
+    for v in col:
+        s = str(v)
+        if len(s) == 7:  # YYYYDOY
+            # will raise if invalid
+            dt.datetime.strptime(s, "%Y%j")
+            out.append(int(s))
+        elif len(s) == 8 and s.isdigit():  # YYYYMMDD
+            d = dt.datetime.strptime(s, "%Y%m%d")
+            out.append(int(f"{d.year}{d.timetuple().tm_yday:03d}"))
+        else:  # try ISO
+            try:
+                d = dt.datetime.strptime(s, "%Y-%m-%d")
+                out.append(int(f"{d.year}{d.timetuple().tm_yday:03d}"))
+            except Exception as e:
+                raise ValueError(f"Unrecognized date format: {s}") from e
+    return np.array(out, dtype="uint32")
+def read_timeseries_csv(path: str) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
+    """
+    Read a per-site (single pixel) time series CSV.
+    Expected columns:
+      - 'time'      : YYYYDOY or YYYYMMDD or YYYY-MM-DD
+      - 'vi'        : vegetation index values (float)
+      - 'qa'        : optional; quality or weights (float/int). If missing, set to 1.
+      - 'lc'        : optional; land cover code (int). If missing, set to 1.
+    Returns:
+      vi  : array shaped (1, 1, T)
+      qa  : array shaped (1, 1, T)
+      timevector : 1-D uint32 YYYYDOY of length T
+    """
+    df = pd.read_csv(path)
+    if "time" not in df or "vi" not in df:
+        raise ValueError("CSV must contain at least 'time' and 'vi' columns.")
+    timevector = _parse_time_column(df["time"])
+    vi = df["vi"].to_numpy(dtype="float64")
+    qa = df["qa"].to_numpy(dtype="float64") if "qa" in df else np.ones_like(vi, dtype="float64")
+    # shape to (y=1, x=1, z=T)
+    vi = vi.reshape(1, 1, -1, order="F")
+    qa = qa.reshape(1, 1, -1, order="F")
+    return vi, qa, timevector
+def write_timesat_csv_outputs(
+    out_folder: str,
+    timevector_out: np.ndarray,   # p_outindex dates in YYYYDOY
+    yfit: np.ndarray,             # shape (T_out,) for single site
+    vpp: Optional[np.ndarray],    # shape (13*2*yr,) flattened for single site
+    nseason: Optional[int]
+) -> None:
+    """
+    Writes three CSVs:
+      - yfit.csv: columns [time(YYYYDOY), yfit]
+      - vpp.csv : 13*2*yr parameters as columns VPP_1 ... VPP_N (optional if vpp is None)
+      - nseason.csv: single row with nseason (optional if nseason is None)
+    """
+    import os
+    os.makedirs(out_folder, exist_ok=True)
+    # yfit
+    yfit_df = pd.DataFrame({
+        "time": timevector_out.astype("uint32"),
+        "yfit": yfit.astype("float64")
+    })
+    yfit_df.to_csv(os.path.join(out_folder, "yfit.csv"), index=False)
+    # vpp
+    if vpp is not None:
+        vpp = vpp.ravel(order="F").astype("float64")
+        cols = [f"VPP_{i+1}" for i in range(vpp.size)]
+        vpp_df = pd.DataFrame([vpp], columns=cols)
+        vpp_df.to_csv(os.path.join(out_folder, "vpp.csv"), index=False)
+    # nseason
+    if nseason is not None:
+        pd.DataFrame({"nseason": [int(nseason)]}).to_csv(
+            os.path.join(out_folder, "nseason.csv"), index=False
+        )

timesat_cli/dateutils.py ADDED Viewed

@@ -0,0 +1,118 @@
+"""
+Utility functions for handling date operations in TIMESAT processing.
+"""
+from __future__ import annotations
+import datetime
+import numpy as np
+__all__ = ["date_with_ignored_day", "generate_output_timeseries_dates"]
+def is_leap_year(y: int) -> bool:
+    """
+    Return True if year y is a Gregorian leap year, False otherwise.
+    """
+    return (y % 4 == 0 and y % 100 != 0) or (y % 400 == 0)
+def date_with_ignored_day(yrstart: int, i_tv: int, p_ignoreday: int) -> datetime.date:
+    """
+    Convert a synthetic TIMESAT time index (1-based, assuming 365 days/year)
+    into a real calendar date while skipping one day in leap years.
+    """
+    # ---- Step 1: synthetic 365-day calendar ----
+    i = int(i_tv)
+    year_offset, doy_365 = divmod(i - 1, 365)
+    doy_365 += 1
+    year = yrstart + year_offset
+    jan1 = datetime.date(year, 1, 1)
+    if is_leap_year(year):
+        if not (1 <= p_ignoreday <= 366):
+            raise ValueError("p_ignoreday must be in [1, 366] for leap years")
+        if p_ignoreday == 1:
+            real_ordinal = doy_365 + 1
+        elif p_ignoreday == 366:
+            real_ordinal = doy_365
+        else:
+            real_ordinal = doy_365 if doy_365 < p_ignoreday else doy_365 + 1
+    else:
+        real_ordinal = doy_365
+    return jan1 + datetime.timedelta(days=real_ordinal - 1)
+def build_monthly_sample_indices(yrstart: int, yr: int) -> np.ndarray:
+    """
+    Build a synthetic time index (1-based) for sampling the 1st, 11th, and 21st
+    of each month across multiple years.
+    The synthetic timeline always uses 365 days per year.
+    In leap years we:
+        - keep Feb 29
+        - drop Dec 31
+    so that each year still has 365 synthetic days.
+    Parameters
+    ----------
+    yrstart : int
+        Starting year of the period.
+    yr : int
+        Number of years to include.
+    Returns
+    -------
+    np.ndarray
+        A 1D array of indices into the synthetic timeline (1-based).
+    """
+    indices: list[int] = []
+    year_offset = 0  # offset of each synthetic year start (0, 365, 730, ...)
+    for year in range(yrstart, yrstart + yr):
+        if is_leap_year(year):
+            # Include Feb 29, drop Dec 31
+            days_in_month = [31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 30]
+        else:
+            days_in_month = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
+        cum = 0  # cumulative day count within the current year
+        for dim in days_in_month:
+            for d in (1, 11, 21):
+                if d <= dim:
+                    indices.append(year_offset + cum + d)
+            cum += dim
+        year_offset += 365
+    return np.array(indices, dtype=int)
+def generate_output_timeseries_dates(p_st_timestep, yr, yrstart):
+    p_st_timestep = int(p_st_timestep)
+    if p_st_timestep > 0:
+        p_outindex = np.arange(1, yr * 365 + 1)[::p_st_timestep]
+    elif p_st_timestep < 0:
+        p_outindex = build_monthly_sample_indices(yrstart, yr)
+    else:  # p_st_timestep == 0
+        p_outindex = np.arange(1, yr * 365 + 1)[::9999]
+    # HRVPP2 timestep: delete first year and last year from p_outindex
+    if p_st_timestep == -1:
+        first_year_end = 365
+        last_year_start = (yr - 1) * 365 + 1
+        # keep only indices that are NOT in year 1 and NOT in last year
+        p_outindex = p_outindex[(p_outindex > first_year_end) & (p_outindex < last_year_start)]
+    p_outindex_num = len(p_outindex)
+    return p_outindex, p_outindex_num

timesat_cli/fsutils.py ADDED Viewed

@@ -0,0 +1,64 @@
+from __future__ import annotations
+import os
+import math
+__all__ = ["create_output_folders", "close_all"]
+def create_output_folders(outfolder: str) -> tuple[str, str]:
+    vpp_folder = os.path.join(outfolder, "VPP")
+    st_folder  = os.path.join(outfolder, "ST")
+    os.makedirs(vpp_folder, exist_ok=True)
+    os.makedirs(st_folder,  exist_ok=True)
+    return st_folder, vpp_folder
+def memory_plan(
+    dx: int,
+    dy: int,
+    z: int,
+    p_outindex_num: int,
+    yr: int,
+    max_memory_gb: float,
+) -> tuple[int, int]:
+    num_layers = (
+        2 * z                  # VI + QA
+        + 2 * p_outindex_num   # yfit + yfit QA
+        + 2 * 13 * 2 * yr      # VPP + VPP QA
+        + yr                   # nseason
+    )
+    bytes_per = 8  # float64
+    safety = 0.6   # keep 60% margin for overhead
+    max_bytes = max_memory_gb * (2 ** 30) * safety
+    dy_max = max_bytes / (dx * num_layers * bytes_per) if num_layers > 0 else dy
+    y_slice_size = int(min(math.floor(dy_max), dy)) if dy_max > 0 else dy
+    y_slice_size = max(1, y_slice_size)
+    num_block = int(math.ceil(dy / y_slice_size))
+    return y_slice_size, num_block
+def close_all(*items):
+    """
+    Close datasets or other objects that have a .close() method.
+    Accepts individual objects and iterables (lists/tuples/etc).
+    Ignores None safely.
+    """
+    for obj in items:
+        if obj is None:
+            continue
+        # If it's an iterable of objects (e.g. list of datasets)
+        if isinstance(obj, (list, tuple, set)):
+            for x in obj:
+                if x is None:
+                    continue
+                close = getattr(x, "close", None)
+                if callable(close):
+                    close()
+        else:
+            # Single object
+            close = getattr(obj, "close", None)
+            if callable(close):
+                close()