timesat-cli 1.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,8 @@
1
+ """
2
+ TIMESAT CLI package.
3
+
4
+ This package provides a Python interface and CLI wrapper for running TIMESAT
5
+ processing pipelines.
6
+ """
7
+
8
+
@@ -0,0 +1,93 @@
1
+ # src/timesat_cli/__main__.py
2
+ # Windows: set PYTHONPATH=src
3
+ # Windows: python -m timesat_cli -t 12
4
+
5
+ import argparse
6
+ import os
7
+ import sys
8
+
9
+
10
+ def _platform() -> str:
11
+ if sys.platform.startswith("win"):
12
+ return "windows"
13
+ if sys.platform == "darwin":
14
+ return "mac"
15
+ return "linux"
16
+
17
+
18
+ def _validate_threads(value: int | None) -> int | None:
19
+ """
20
+ None -> not provided (do not override config)
21
+ >0 -> use exactly that
22
+ 0 -> treat as "use all logical CPUs" (optional behavior)
23
+ """
24
+ if value is None:
25
+ return None
26
+
27
+ if not isinstance(value, int):
28
+ raise argparse.ArgumentTypeError("threads must be an integer")
29
+
30
+ if value < 0:
31
+ raise argparse.ArgumentTypeError("threads must be >= 0")
32
+
33
+ cpu = os.cpu_count() or 1
34
+
35
+ if value == 0:
36
+ return cpu
37
+
38
+ if value > cpu * 4:
39
+ # protect against accidental huge numbers; adjust policy if you prefer
40
+ raise argparse.ArgumentTypeError(
41
+ f"threads={value} is too large for this machine (cpu_count={cpu})."
42
+ )
43
+
44
+ return value
45
+
46
+
47
+ def _set_thread_env(threads: int, plat: str) -> None:
48
+ """
49
+ Set environment variables BEFORE importing Fortran / NumPy / MKL code.
50
+ Uses slightly different defaults by platform.
51
+ """
52
+ t = str(int(threads))
53
+
54
+ # Always safe / common:
55
+ os.environ["OMP_NUM_THREADS"] = t
56
+ os.environ.setdefault("OPENBLAS_NUM_THREADS", t)
57
+ os.environ.setdefault("MKL_NUM_THREADS", t)
58
+ os.environ.setdefault("NUMEXPR_NUM_THREADS", t)
59
+
60
+ # Intel OpenMP runtime knobs (most relevant on Windows; harmless elsewhere)
61
+ if plat == "windows":
62
+ os.environ.setdefault("KMP_NUM_THREADS", t)
63
+ os.environ.setdefault("OMP_DYNAMIC", "FALSE") # avoid auto-reducing threads
64
+
65
+ # Optional: if you see odd scheduling/perf, you can try enabling one:
66
+ # os.environ.setdefault("KMP_AFFINITY", "granularity=fine,compact,1,0")
67
+ # os.environ.setdefault("KMP_BLOCKTIME", "0")
68
+
69
+
70
+ def main() -> None:
71
+ parser = argparse.ArgumentParser(description="Run TIMESAT processing pipeline.")
72
+ parser.add_argument("settings_json", help="Path to the JSON configuration file.")
73
+ parser.add_argument(
74
+ "-t", "--threads",
75
+ type=int,
76
+ default=None,
77
+ help="Number of threads. Use 0 to mean 'all CPUs'.",
78
+ )
79
+ args = parser.parse_args()
80
+
81
+ plat = _platform()
82
+ threads = _validate_threads(args.threads)
83
+
84
+ # IMPORTANT: set env vars before importing processing / Fortran extension
85
+ if threads is not None:
86
+ _set_thread_env(threads, plat)
87
+
88
+ from .processing import run
89
+ run(args.settings_json)
90
+
91
+
92
+ if __name__ == "__main__":
93
+ main()
timesat_cli/config.py ADDED
@@ -0,0 +1,160 @@
1
+ from __future__ import annotations
2
+ import json
3
+ from dataclasses import dataclass
4
+ from typing import List, Sequence, Tuple
5
+ import numpy as np
6
+
7
+
8
+ @dataclass
9
+ class ClassParams:
10
+ landuse: int
11
+ p_fitmethod: int
12
+ p_smooth: float
13
+ p_nenvi: int
14
+ p_wfactnum: float
15
+ p_startmethod: int
16
+ p_startcutoff: Tuple[float, float]
17
+ p_low_percentile: float
18
+ p_fillbase: int
19
+ p_seasonmethod: int
20
+ p_seapar: float
21
+
22
+
23
+ @dataclass
24
+ class Settings:
25
+ s3env: str
26
+ image_file_list: str
27
+ quality_file_list: str
28
+ tv_list: str
29
+ lc_file: str
30
+ outputfolder: str
31
+ imwindow: Sequence[int]
32
+
33
+ p_band_id: int
34
+ p_ignoreday: int
35
+ p_ylu: np.ndarray
36
+ p_a: List[List[float]]
37
+ p_st_timestep: int
38
+ p_nodata: float
39
+ p_davailwin: int
40
+ p_outlier: int
41
+ p_printflag: int
42
+ max_memory_gb: float
43
+ scale: float
44
+ offset: float
45
+ p_hrvppformat: int
46
+ p_nclasses: int
47
+ classes: List[ClassParams]
48
+ outputvariables: int
49
+
50
+
51
+ @dataclass
52
+ class Config:
53
+ settings: Settings
54
+
55
+
56
+ def _as_array(value, dtype=float, fortran=False):
57
+ arr = np.array(value, dtype=dtype)
58
+ if fortran:
59
+ arr = np.asfortranarray(arr)
60
+ return arr
61
+
62
+
63
+ def load_config(jsfile: str) -> Config:
64
+ with open(jsfile, "r") as f:
65
+ data = json.load(f)
66
+
67
+ s = data["settings"]
68
+ nclasses = int(s["p_nclasses"]["value"])
69
+
70
+ classes: List[ClassParams] = []
71
+ for i in range(nclasses):
72
+ k = f"class{i+1}"
73
+ c = data[k]
74
+ classes.append(
75
+ ClassParams(
76
+ landuse=int(c["landuse"]["value"]),
77
+ p_fitmethod=int(c["p_fitmethod"]["value"]),
78
+ p_smooth=float(_as_array(c["p_smooth"]["value"], dtype="double")),
79
+ p_nenvi=int(c["p_nenvi"]["value"]),
80
+ p_wfactnum=float(_as_array(c["p_wfactnum"]["value"], dtype="double")),
81
+ p_startmethod=int(c["p_startmethod"]["value"]),
82
+ p_startcutoff=tuple(_as_array(c["p_startcutoff"]["value"], dtype="double", fortran=True)),
83
+ p_low_percentile=float(_as_array(c["p_low_percentile"]["value"], dtype="double")),
84
+ p_fillbase=int(c["p_fillbase"]["value"]),
85
+ p_seasonmethod=int(c["p_seasonmethod"]["value"]),
86
+ p_seapar=float(_as_array(c["p_seapar"]["value"], dtype="double")),
87
+ )
88
+ )
89
+
90
+ settings = Settings(
91
+ s3env=s["s3env"]["value"],
92
+ image_file_list=s["image_file_list"]["value"],
93
+ quality_file_list=s["quality_file_list"]["value"],
94
+ tv_list=s["tv_list"]["value"],
95
+ lc_file=s["lc_file"]["value"],
96
+ outputfolder=s["outputfolder"]["value"],
97
+ imwindow=s["imwindow"]["value"],
98
+ p_band_id=int(s["p_band_id"]["value"]),
99
+ p_ignoreday=int(s["p_ignoreday"]["value"]),
100
+ p_ylu=_as_array(s["p_ylu"]["value"], dtype="double", fortran=True),
101
+ p_a=s["p_a"]["value"],
102
+ p_st_timestep=int(s["p_st_timestep"]["value"]),
103
+ p_nodata=float(s["p_nodata"]["value"]),
104
+ p_davailwin=int(s["p_davailwin"]["value"]),
105
+ p_outlier=int(s["p_outlier"]["value"]),
106
+ p_printflag=int(s["p_printflag"]["value"]),
107
+ max_memory_gb=float(s["max_memory_gb"]["value"]),
108
+ scale=float(s["scale"]["value"]),
109
+ offset=float(s["offset"]["value"]),
110
+ p_hrvppformat=int(s["p_hrvppformat"]["value"]),
111
+ outputvariables=int(s["outputvariables"]["value"]),
112
+ p_nclasses=nclasses,
113
+ classes=classes,
114
+ )
115
+
116
+ return Config(settings=settings)
117
+
118
+
119
+ def build_param_array(
120
+ s,
121
+ attr: str,
122
+ dtype,
123
+ size: int = 255,
124
+ shape: Tuple[int, ...] | None = None,
125
+ fortran_2d: bool = False
126
+ ):
127
+ """
128
+ Build a parameter array for TIMESAT class settings.
129
+
130
+ Parameters
131
+ ----------
132
+ s : object
133
+ Settings container with `classes` iterable.
134
+ attr : str
135
+ Attribute on each class object in `s.classes` (e.g., 'p_smooth').
136
+ dtype : numpy dtype or dtype string (e.g., 'uint8', 'double').
137
+ size : int
138
+ Length of the first dimension (TIMESAT expects 255).
139
+ shape : tuple[int, ...] | None
140
+ Extra trailing shape for per-class vectors (e.g., (2,) for p_startcutoff).
141
+ fortran_2d : bool
142
+ If True and `shape==(2,)`, allocate (size,2) with order='F' to mirror legacy layout.
143
+
144
+ Returns
145
+ -------
146
+ np.ndarray
147
+ Filled parameter array.
148
+ """
149
+ if shape is None:
150
+ arr = np.zeros(size, dtype=dtype)
151
+ for i, c in enumerate(s.classes):
152
+ arr[i] = getattr(c, attr)
153
+ return arr
154
+
155
+ full_shape = (size, *shape)
156
+ order = 'F' if fortran_2d and len(shape) == 1 and shape[0] > 1 else 'C'
157
+ arr = np.zeros(full_shape, dtype=dtype, order=order)
158
+ for i, c in enumerate(s.classes):
159
+ arr[i, ...] = getattr(c, attr)
160
+ return arr
@@ -0,0 +1,58 @@
1
+ from __future__ import annotations
2
+ import os
3
+ from dotenv import load_dotenv
4
+ from urllib.parse import urlparse
5
+ import boto3
6
+ from botocore.config import Config
7
+
8
+ __all__ = ["load_s3_config","build_rasterio_s3_opts","to_vsis3_paths"]
9
+
10
+ def load_s3_config():
11
+ """
12
+ Load and validate S3 / CloudFerro configuration from environment variables.
13
+ Returns a dict with validated values.
14
+ """
15
+ load_dotenv() # default path
16
+
17
+ config = {
18
+ "AWS_ACCESS_KEY_ID": os.getenv("AWS_ACCESS_KEY_ID"),
19
+ "AWS_SECRET_ACCESS_KEY": os.getenv("AWS_SECRET_ACCESS_KEY"),
20
+ "AWS_SESSION_TOKEN": os.getenv("AWS_SESSION_TOKEN"), # optional
21
+ "S3_BUCKET": os.getenv("S3_BUCKET"),
22
+ "ENDPOINT_URL": os.getenv("ENDPOINT_URL"),
23
+ }
24
+
25
+ required = [
26
+ config["AWS_ACCESS_KEY_ID"],
27
+ config["AWS_SECRET_ACCESS_KEY"],
28
+ config["S3_BUCKET"],
29
+ config["ENDPOINT_URL"],
30
+ ]
31
+
32
+ if not all(required):
33
+ raise RuntimeError(
34
+ "Missing required environment variables. "
35
+ "Check AWS_ACCESS_KEY_ID, AWS_SECRET_ACCESS_KEY, "
36
+ "S3_BUCKET, ENDPOINT_URL."
37
+ )
38
+
39
+ return config
40
+
41
+
42
+ def build_rasterio_s3_opts(cfg: dict) -> dict:
43
+ return boto3.client(
44
+ "s3",
45
+ endpoint_url=cfg["ENDPOINT_URL"], # your S3-compatible endpoint
46
+ aws_access_key_id=cfg["AWS_ACCESS_KEY_ID"],
47
+ aws_secret_access_key=cfg["AWS_SECRET_ACCESS_KEY"],
48
+ aws_session_token=cfg.get("AWS_SESSION_TOKEN"),
49
+ config=Config(signature_version="s3v4", s3={"addressing_style": "path"}),
50
+ )
51
+
52
+
53
+ def to_vsis3_paths(s3, bucket, key, expires=3600):
54
+ return s3.generate_presigned_url(
55
+ "get_object",
56
+ Params={"Bucket": bucket, "Key": key},
57
+ ExpiresIn=expires,
58
+ )
@@ -0,0 +1,93 @@
1
+ # csvutils.py
2
+ from __future__ import annotations
3
+ from typing import Tuple, Iterable, Optional
4
+ import numpy as np
5
+ import pandas as pd
6
+ import datetime as dt
7
+
8
+ __all__ = ["read_timeseries_csv", "write_timesat_csv_outputs"]
9
+
10
+ def _parse_time_column(col: Iterable[str | int]) -> np.ndarray:
11
+ """
12
+ Accepts YYYYDOY (e.g., 2020123) or YYYYMMDD (e.g., 20200123) or ISO 'YYYY-MM-DD'.
13
+ Returns uint32 vector in YYYYDOY.
14
+ """
15
+ out = []
16
+ for v in col:
17
+ s = str(v)
18
+ if len(s) == 7: # YYYYDOY
19
+ # will raise if invalid
20
+ dt.datetime.strptime(s, "%Y%j")
21
+ out.append(int(s))
22
+ elif len(s) == 8 and s.isdigit(): # YYYYMMDD
23
+ d = dt.datetime.strptime(s, "%Y%m%d")
24
+ out.append(int(f"{d.year}{d.timetuple().tm_yday:03d}"))
25
+ else: # try ISO
26
+ try:
27
+ d = dt.datetime.strptime(s, "%Y-%m-%d")
28
+ out.append(int(f"{d.year}{d.timetuple().tm_yday:03d}"))
29
+ except Exception as e:
30
+ raise ValueError(f"Unrecognized date format: {s}") from e
31
+ return np.array(out, dtype="uint32")
32
+
33
+ def read_timeseries_csv(path: str) -> Tuple[np.ndarray, np.ndarray, np.ndarray]:
34
+ """
35
+ Read a per-site (single pixel) time series CSV.
36
+
37
+ Expected columns:
38
+ - 'time' : YYYYDOY or YYYYMMDD or YYYY-MM-DD
39
+ - 'vi' : vegetation index values (float)
40
+ - 'qa' : optional; quality or weights (float/int). If missing, set to 1.
41
+ - 'lc' : optional; land cover code (int). If missing, set to 1.
42
+
43
+ Returns:
44
+ vi : array shaped (1, 1, T)
45
+ qa : array shaped (1, 1, T)
46
+ timevector : 1-D uint32 YYYYDOY of length T
47
+ """
48
+ df = pd.read_csv(path)
49
+ if "time" not in df or "vi" not in df:
50
+ raise ValueError("CSV must contain at least 'time' and 'vi' columns.")
51
+ timevector = _parse_time_column(df["time"])
52
+ vi = df["vi"].to_numpy(dtype="float64")
53
+ qa = df["qa"].to_numpy(dtype="float64") if "qa" in df else np.ones_like(vi, dtype="float64")
54
+ # shape to (y=1, x=1, z=T)
55
+ vi = vi.reshape(1, 1, -1, order="F")
56
+ qa = qa.reshape(1, 1, -1, order="F")
57
+ return vi, qa, timevector
58
+
59
+ def write_timesat_csv_outputs(
60
+ out_folder: str,
61
+ timevector_out: np.ndarray, # p_outindex dates in YYYYDOY
62
+ yfit: np.ndarray, # shape (T_out,) for single site
63
+ vpp: Optional[np.ndarray], # shape (13*2*yr,) flattened for single site
64
+ nseason: Optional[int]
65
+ ) -> None:
66
+ """
67
+ Writes three CSVs:
68
+ - yfit.csv: columns [time(YYYYDOY), yfit]
69
+ - vpp.csv : 13*2*yr parameters as columns VPP_1 ... VPP_N (optional if vpp is None)
70
+ - nseason.csv: single row with nseason (optional if nseason is None)
71
+ """
72
+ import os
73
+ os.makedirs(out_folder, exist_ok=True)
74
+
75
+ # yfit
76
+ yfit_df = pd.DataFrame({
77
+ "time": timevector_out.astype("uint32"),
78
+ "yfit": yfit.astype("float64")
79
+ })
80
+ yfit_df.to_csv(os.path.join(out_folder, "yfit.csv"), index=False)
81
+
82
+ # vpp
83
+ if vpp is not None:
84
+ vpp = vpp.ravel(order="F").astype("float64")
85
+ cols = [f"VPP_{i+1}" for i in range(vpp.size)]
86
+ vpp_df = pd.DataFrame([vpp], columns=cols)
87
+ vpp_df.to_csv(os.path.join(out_folder, "vpp.csv"), index=False)
88
+
89
+ # nseason
90
+ if nseason is not None:
91
+ pd.DataFrame({"nseason": [int(nseason)]}).to_csv(
92
+ os.path.join(out_folder, "nseason.csv"), index=False
93
+ )
@@ -0,0 +1,118 @@
1
+ """
2
+ Utility functions for handling date operations in TIMESAT processing.
3
+ """
4
+
5
+ from __future__ import annotations
6
+
7
+ import datetime
8
+ import numpy as np
9
+
10
+ __all__ = ["date_with_ignored_day", "generate_output_timeseries_dates"]
11
+
12
+
13
+ def is_leap_year(y: int) -> bool:
14
+ """
15
+ Return True if year y is a Gregorian leap year, False otherwise.
16
+ """
17
+ return (y % 4 == 0 and y % 100 != 0) or (y % 400 == 0)
18
+
19
+
20
+ def date_with_ignored_day(yrstart: int, i_tv: int, p_ignoreday: int) -> datetime.date:
21
+ """
22
+ Convert a synthetic TIMESAT time index (1-based, assuming 365 days/year)
23
+ into a real calendar date while skipping one day in leap years.
24
+ """
25
+
26
+ # ---- Step 1: synthetic 365-day calendar ----
27
+ i = int(i_tv)
28
+ year_offset, doy_365 = divmod(i - 1, 365)
29
+ doy_365 += 1
30
+ year = yrstart + year_offset
31
+
32
+ jan1 = datetime.date(year, 1, 1)
33
+
34
+ if is_leap_year(year):
35
+ if not (1 <= p_ignoreday <= 366):
36
+ raise ValueError("p_ignoreday must be in [1, 366] for leap years")
37
+
38
+ if p_ignoreday == 1:
39
+ real_ordinal = doy_365 + 1
40
+ elif p_ignoreday == 366:
41
+ real_ordinal = doy_365
42
+ else:
43
+ real_ordinal = doy_365 if doy_365 < p_ignoreday else doy_365 + 1
44
+ else:
45
+ real_ordinal = doy_365
46
+
47
+ return jan1 + datetime.timedelta(days=real_ordinal - 1)
48
+
49
+
50
+ def build_monthly_sample_indices(yrstart: int, yr: int) -> np.ndarray:
51
+ """
52
+ Build a synthetic time index (1-based) for sampling the 1st, 11th, and 21st
53
+ of each month across multiple years.
54
+
55
+ The synthetic timeline always uses 365 days per year.
56
+ In leap years we:
57
+ - keep Feb 29
58
+ - drop Dec 31
59
+ so that each year still has 365 synthetic days.
60
+
61
+ Parameters
62
+ ----------
63
+ yrstart : int
64
+ Starting year of the period.
65
+
66
+ yr : int
67
+ Number of years to include.
68
+
69
+ Returns
70
+ -------
71
+ np.ndarray
72
+ A 1D array of indices into the synthetic timeline (1-based).
73
+ """
74
+
75
+ indices: list[int] = []
76
+ year_offset = 0 # offset of each synthetic year start (0, 365, 730, ...)
77
+
78
+ for year in range(yrstart, yrstart + yr):
79
+ if is_leap_year(year):
80
+ # Include Feb 29, drop Dec 31
81
+ days_in_month = [31, 29, 31, 30, 31, 30, 31, 31, 30, 31, 30, 30]
82
+ else:
83
+ days_in_month = [31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31]
84
+
85
+ cum = 0 # cumulative day count within the current year
86
+
87
+ for dim in days_in_month:
88
+ for d in (1, 11, 21):
89
+ if d <= dim:
90
+ indices.append(year_offset + cum + d)
91
+ cum += dim
92
+
93
+ year_offset += 365
94
+
95
+ return np.array(indices, dtype=int)
96
+
97
+
98
+ def generate_output_timeseries_dates(p_st_timestep, yr, yrstart):
99
+ p_st_timestep = int(p_st_timestep)
100
+
101
+ if p_st_timestep > 0:
102
+ p_outindex = np.arange(1, yr * 365 + 1)[::p_st_timestep]
103
+ elif p_st_timestep < 0:
104
+ p_outindex = build_monthly_sample_indices(yrstart, yr)
105
+ else: # p_st_timestep == 0
106
+ p_outindex = np.arange(1, yr * 365 + 1)[::9999]
107
+
108
+ # HRVPP2 timestep: delete first year and last year from p_outindex
109
+ if p_st_timestep == -1:
110
+ first_year_end = 365
111
+ last_year_start = (yr - 1) * 365 + 1
112
+
113
+ # keep only indices that are NOT in year 1 and NOT in last year
114
+ p_outindex = p_outindex[(p_outindex > first_year_end) & (p_outindex < last_year_start)]
115
+
116
+ p_outindex_num = len(p_outindex)
117
+
118
+ return p_outindex, p_outindex_num
timesat_cli/fsutils.py ADDED
@@ -0,0 +1,64 @@
1
+ from __future__ import annotations
2
+ import os
3
+ import math
4
+
5
+ __all__ = ["create_output_folders", "close_all"]
6
+
7
+
8
+ def create_output_folders(outfolder: str) -> tuple[str, str]:
9
+ vpp_folder = os.path.join(outfolder, "VPP")
10
+ st_folder = os.path.join(outfolder, "ST")
11
+ os.makedirs(vpp_folder, exist_ok=True)
12
+ os.makedirs(st_folder, exist_ok=True)
13
+ return st_folder, vpp_folder
14
+
15
+
16
+ def memory_plan(
17
+ dx: int,
18
+ dy: int,
19
+ z: int,
20
+ p_outindex_num: int,
21
+ yr: int,
22
+ max_memory_gb: float,
23
+ ) -> tuple[int, int]:
24
+ num_layers = (
25
+ 2 * z # VI + QA
26
+ + 2 * p_outindex_num # yfit + yfit QA
27
+ + 2 * 13 * 2 * yr # VPP + VPP QA
28
+ + yr # nseason
29
+ )
30
+
31
+ bytes_per = 8 # float64
32
+ safety = 0.6 # keep 60% margin for overhead
33
+ max_bytes = max_memory_gb * (2 ** 30) * safety
34
+
35
+ dy_max = max_bytes / (dx * num_layers * bytes_per) if num_layers > 0 else dy
36
+ y_slice_size = int(min(math.floor(dy_max), dy)) if dy_max > 0 else dy
37
+ y_slice_size = max(1, y_slice_size)
38
+ num_block = int(math.ceil(dy / y_slice_size))
39
+ return y_slice_size, num_block
40
+
41
+
42
+ def close_all(*items):
43
+ """
44
+ Close datasets or other objects that have a .close() method.
45
+ Accepts individual objects and iterables (lists/tuples/etc).
46
+ Ignores None safely.
47
+ """
48
+ for obj in items:
49
+ if obj is None:
50
+ continue
51
+
52
+ # If it's an iterable of objects (e.g. list of datasets)
53
+ if isinstance(obj, (list, tuple, set)):
54
+ for x in obj:
55
+ if x is None:
56
+ continue
57
+ close = getattr(x, "close", None)
58
+ if callable(close):
59
+ close()
60
+ else:
61
+ # Single object
62
+ close = getattr(obj, "close", None)
63
+ if callable(close):
64
+ close()